diff --git a/Llama-3.2-3B-Instruct_chunk1.mlmodelc/analytics/coremldata.bin b/Llama-3.2-3B-Instruct_chunk1.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..a0f88447ea8d18392fe521e05f7f0cdbe07326e7 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk1.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0806d9561f1b977f8fb7c990502de9bc2576ac170b096b4b3479ca05c69b5db9 +size 243 diff --git a/Llama-3.2-3B-Instruct_chunk1.mlmodelc/coremldata.bin b/Llama-3.2-3B-Instruct_chunk1.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6c1e56794a0b19a2e171e365e299a5a5f3b1c56a --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk1.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e7d9258e6a9d7d75508e04144cce26719dee2fd20b0953014c351af2d53f0f6a +size 409 diff --git a/Llama-3.2-3B-Instruct_chunk1.mlmodelc/metadata.json b/Llama-3.2-3B-Instruct_chunk1.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5064b6f7ebc3ec2b533b2a66b67a7756f5a8cc71 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk1.mlmodelc/metadata.json @@ -0,0 +1,105 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 512, 1, 64]", + "name" : "mask", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Select" : 2, + "Tile" : 2, + "Ios16.sub" : 3, + "Transpose" : 2, + "Ios16.gather" : 3, + "ExpandDims" : 3, + "Ios16.reshape" : 1, + "Ios16.maximum" : 1, + "Ios16.less" : 2 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Int32", + "formattedType" : "MultiArray (Int32 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 64]", + "name" : "input_ids", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Int32", + "formattedType" : "MultiArray (Int32 1)", + "shortDescription" : "", + "shape" : "[1]", + "name" : "full_sequence_length", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_3_2_3B_Instruct_2024_11_09_16_14_37_chunk1", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk1.mlmodelc/model.mil b/Llama-3.2-3B-Instruct_chunk1.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..f01973a2b85322f7b32a41c3c9fa22862154752d --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk1.mlmodelc/model.mil @@ -0,0 +1,50 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor full_sequence_length, tensor input_ids) { + tensor T = const()[name = tensor("T"), val = tensor([64])]; + tensor x_1_axis_0 = const()[name = tensor("x_1_axis_0"), val = tensor(0)]; + tensor x_1_batch_dims_0 = const()[name = tensor("x_1_batch_dims_0"), val = tensor(0)]; + tensor wte_weight_to_fp16 = const()[name = tensor("wte_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_1_cast_fp16 = gather(axis = x_1_axis_0, batch_dims = x_1_batch_dims_0, indices = input_ids, x = wte_weight_to_fp16)[name = tensor("x_1_cast_fp16")]; + tensor x_perm_0 = const()[name = tensor("x_perm_0"), val = tensor([0, 2, 1])]; + tensor var_27 = const()[name = tensor("op_27"), val = tensor([1, 3072, -1, 8])]; + tensor x_cast_fp16 = transpose(perm = x_perm_0, x = x_1_cast_fp16)[name = tensor("transpose_1")]; + tensor x = reshape(shape = var_27, x = x_cast_fp16)[name = tensor("op_28_cast_fp16")]; + tensor pos_offset = sub(x = T, y = full_sequence_length)[name = tensor("pos_offset")]; + tensor var_36 = const()[name = tensor("op_36"), val = tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63])]; + tensor input_pos_1 = sub(x = var_36, y = pos_offset)[name = tensor("input_pos_1")]; + tensor var_44 = const()[name = tensor("op_44"), val = tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]; + tensor input_pos = maximum(x = input_pos_1, y = var_44)[name = tensor("input_pos")]; + tensor var_55 = const()[name = tensor("op_55"), val = tensor(1)]; + tensor cos_batch_dims_0 = const()[name = tensor("cos_batch_dims_0"), val = tensor(0)]; + tensor var_54_to_fp16 = const()[name = tensor("op_54_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(788004992)))]; + tensor cos = gather(axis = var_55, batch_dims = cos_batch_dims_0, indices = input_pos, x = var_54_to_fp16)[name = tensor("cos_cast_fp16")]; + tensor var_66 = const()[name = tensor("op_66"), val = tensor(1)]; + tensor sin_batch_dims_0 = const()[name = tensor("sin_batch_dims_0"), val = tensor(0)]; + tensor var_65_to_fp16 = const()[name = tensor("op_65_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(788136128)))]; + tensor sin = gather(axis = var_66, batch_dims = sin_batch_dims_0, indices = input_pos, x = var_65_to_fp16)[name = tensor("sin_cast_fp16")]; + tensor var_102 = const()[name = tensor("op_102"), val = tensor([[0], [1], [2], [3], [4], [5], [6], [7], [8], [9], [10], [11], [12], [13], [14], [15], [16], [17], [18], [19], [20], [21], [22], [23], [24], [25], [26], [27], [28], [29], [30], [31], [32], [33], [34], [35], [36], [37], [38], [39], [40], [41], [42], [43], [44], [45], [46], [47], [48], [49], [50], [51], [52], [53], [54], [55], [56], [57], [58], [59], [60], [61], [62], [63]])]; + tensor var_105 = less(x = var_102, y = pos_offset)[name = tensor("op_105")]; + tensor var_105_after_broadcast_reps_0 = const()[name = tensor("op_105_after_broadcast_reps_0"), val = tensor([1, 512])]; + tensor var_105_after_broadcast = tile(reps = var_105_after_broadcast_reps_0, x = var_105)[name = tensor("op_105_after_broadcast")]; + tensor all_mask_to_fp16 = const()[name = tensor("all_mask_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(788267264)))]; + tensor m_1_to_fp16 = const()[name = tensor("m_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(788332864)))]; + tensor m_3_cast_fp16 = select(a = all_mask_to_fp16, b = m_1_to_fp16, cond = var_105_after_broadcast)[name = tensor("m_3_cast_fp16")]; + tensor var_115 = const()[name = tensor("op_115"), val = tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 503, 504, 505, 506, 507, 508, 509, 510, 511])]; + tensor var_116 = const()[name = tensor("op_116"), val = tensor(512)]; + tensor var_118 = sub(x = var_116, y = full_sequence_length)[name = tensor("op_118")]; + tensor var_119 = less(x = var_115, y = var_118)[name = tensor("op_119")]; + tensor expand_dims_0_axes_0 = const()[name = tensor("expand_dims_0_axes_0"), val = tensor([0])]; + tensor expand_dims_0 = expand_dims(axes = expand_dims_0_axes_0, x = var_119)[name = tensor("expand_dims_0")]; + tensor var_119_after_broadcast_reps_0 = const()[name = tensor("op_119_after_broadcast_reps_0"), val = tensor([64, 1])]; + tensor var_119_after_broadcast = tile(reps = var_119_after_broadcast_reps_0, x = expand_dims_0)[name = tensor("op_119_after_broadcast")]; + tensor m_cast_fp16 = select(a = all_mask_to_fp16, b = m_3_cast_fp16, cond = var_119_after_broadcast)[name = tensor("m_cast_fp16")]; + tensor var_122_axes_0 = const()[name = tensor("op_122_axes_0"), val = tensor([0])]; + tensor var_122_cast_fp16 = expand_dims(axes = var_122_axes_0, x = m_cast_fp16)[name = tensor("op_122_cast_fp16")]; + tensor mask_axes_0 = const()[name = tensor("mask_axes_0"), val = tensor([0])]; + tensor mask_cast_fp16 = expand_dims(axes = mask_axes_0, x = var_122_cast_fp16)[name = tensor("mask_cast_fp16")]; + tensor var_129 = const()[name = tensor("op_129"), val = tensor([0, 3, 1, 2])]; + tensor mask = transpose(perm = var_129, x = mask_cast_fp16)[name = tensor("transpose_0")]; + } -> (x, cos, sin, mask); +} \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk1.mlmodelc/weights/weight.bin b/Llama-3.2-3B-Instruct_chunk1.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..234764e3927fd6e9e89f3588f6f71421c95a623d --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk1.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6f15abcf5514d401e17446e479bffc8f51867d8bec5ad4b84751ed31b378192 +size 788398464 diff --git a/Llama-3.2-3B-Instruct_chunk10.mlmodelc/analytics/coremldata.bin b/Llama-3.2-3B-Instruct_chunk10.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a63af39cde8e590e41fffd270ab8aede737490d --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk10.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf21e446e7587de3fd840eae95f3e79729298df568725552f7ef5fd8f954e58c +size 243 diff --git a/Llama-3.2-3B-Instruct_chunk10.mlmodelc/coremldata.bin b/Llama-3.2-3B-Instruct_chunk10.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef844658693d8a7fc2951abf2761f8f5f9bc62c3 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk10.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8129d684aa1ea8b76708a186fe44f7ffc4aa08b4854907105fe41c0825e71875 +size 653 diff --git a/Llama-3.2-3B-Instruct_chunk10.mlmodelc/metadata.json b/Llama-3.2-3B-Instruct_chunk10.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..4912583441b6a93c0eddb6ab0a90ff9e17e7c228 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk10.mlmodelc/metadata.json @@ -0,0 +1,178 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 14, + "Ios16.mul" : 70, + "SliceByIndex" : 88, + "Transpose" : 2, + "Ios16.einsum" : 96, + "Ios16.conv" : 14, + "Ios16.add" : 56, + "Ios16.realDiv" : 4, + "Ios16.softmax" : 48, + "Ios16.reduceL2Norm" : 4, + "Ios16.reshape" : 14, + "Ios16.silu" : 2 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 512, 1, 64]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_3_2_3B_Instruct_2024_11_09_16_14_37_chunk10", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk10.mlmodelc/model.mil b/Llama-3.2-3B-Instruct_chunk10.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..78594b4291dc45ae43652f9a31200581b19ad3c6 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk10.mlmodelc/model.mil @@ -0,0 +1,956 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { + tensor var_13 = const()[name = tensor("op_13"), val = tensor(-1)]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-2)]; + tensor var_19 = const()[name = tensor("op_19"), val = tensor(-3)]; + tensor var_52 = const()[name = tensor("op_52"), val = tensor(1)]; + tensor var_55 = const()[name = tensor("op_55"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_52, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_55, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_79_to_fp16 = const()[name = tensor("op_79_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_79_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_100 = const()[name = tensor("op_100"), val = tensor([1, 3072, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_100, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_103 = const()[name = tensor("op_103"), val = tensor([1, 1])]; + tensor var_105 = const()[name = tensor("op_105"), val = tensor([1, 1])]; + tensor q_1_pad_type_0 = const()[name = tensor("q_1_pad_type_0"), val = tensor("custom")]; + tensor q_1_pad_0 = const()[name = tensor("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6464)))]; + tensor q_1_cast_fp16 = conv(dilations = var_105, groups = var_52, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = var_103, weight = blocks_0_attn_q_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_109 = const()[name = tensor("op_109"), val = tensor([1, 1])]; + tensor var_111 = const()[name = tensor("op_111"), val = tensor([1, 1])]; + tensor k_1_pad_type_0 = const()[name = tensor("k_1_pad_type_0"), val = tensor("custom")]; + tensor k_1_pad_0 = const()[name = tensor("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18880896)))]; + tensor k_1_cast_fp16 = conv(dilations = var_111, groups = var_52, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = var_109, weight = blocks_0_attn_k_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_115 = const()[name = tensor("op_115"), val = tensor([1, 1])]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor v_1_pad_type_0 = const()[name = tensor("v_1_pad_type_0"), val = tensor("custom")]; + tensor v_1_pad_0 = const()[name = tensor("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25172416)))]; + tensor v_1_cast_fp16 = conv(dilations = var_117, groups = var_52, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = var_115, weight = blocks_0_attn_v_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_120 = const()[name = tensor("op_120"), val = tensor([1, 24, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_120, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_122 = const()[name = tensor("op_122"), val = tensor([1, -1, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_122, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_136_begin_0 = const()[name = tensor("op_136_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_136_end_0 = const()[name = tensor("op_136_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_136_end_mask_0 = const()[name = tensor("op_136_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_136_cast_fp16 = slice_by_index(begin = var_136_begin_0, end = var_136_end_0, end_mask = var_136_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_136_cast_fp16")]; + tensor var_142_begin_0 = const()[name = tensor("op_142_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_142_end_0 = const()[name = tensor("op_142_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_142_end_mask_0 = const()[name = tensor("op_142_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_142_cast_fp16 = slice_by_index(begin = var_142_begin_0, end = var_142_end_0, end_mask = var_142_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_142_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_144_cast_fp16 = mul(x = var_142_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_144_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_17, interleave = rotated_1_interleave_0, values = (var_144_cast_fp16, var_136_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_147_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_147_cast_fp16")]; + tensor var_148_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_148_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_147_cast_fp16, y = var_148_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor var_167_begin_0 = const()[name = tensor("op_167_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_167_end_0 = const()[name = tensor("op_167_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_167_end_mask_0 = const()[name = tensor("op_167_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_167_cast_fp16 = slice_by_index(begin = var_167_begin_0, end = var_167_end_0, end_mask = var_167_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_167_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_169_cast_fp16 = mul(x = var_167_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_169_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_17, interleave = rotated_3_interleave_0, values = (var_169_cast_fp16, var_161_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_172_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_172_cast_fp16")]; + tensor var_173_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_173_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_172_cast_fp16, y = var_173_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_176 = const()[name = tensor("op_176"), val = tensor([1, -1, 1, 64])]; + tensor k_7_cast_fp16 = reshape(shape = var_176, x = roped_3_cast_fp16)[name = tensor("k_7_cast_fp16")]; + tensor var_178 = const()[name = tensor("op_178"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_178, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_11_interleave_0 = const()[name = tensor("k_11_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_9_perm_0, x = k_7_cast_fp16)[name = tensor("transpose_1")]; + tensor k_11_cast_fp16 = concat(axis = var_19, interleave = k_11_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_11_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_13, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1, 3072, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_186, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_191_begin_0 = const()[name = tensor("op_191_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_191_end_0 = const()[name = tensor("op_191_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_191_end_mask_0 = const()[name = tensor("op_191_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_191_cast_fp16 = slice_by_index(begin = var_191_begin_0, end = var_191_end_0, end_mask = var_191_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_191_cast_fp16")]; + tensor var_195_begin_0 = const()[name = tensor("op_195_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_195_end_0 = const()[name = tensor("op_195_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_195_end_mask_0 = const()[name = tensor("op_195_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_195_cast_fp16 = slice_by_index(begin = var_195_begin_0, end = var_195_end_0, end_mask = var_195_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_195_cast_fp16")]; + tensor var_199_begin_0 = const()[name = tensor("op_199_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_199_end_0 = const()[name = tensor("op_199_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_199_end_mask_0 = const()[name = tensor("op_199_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_199_cast_fp16 = slice_by_index(begin = var_199_begin_0, end = var_199_end_0, end_mask = var_199_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_199_cast_fp16")]; + tensor var_203_begin_0 = const()[name = tensor("op_203_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_203_end_0 = const()[name = tensor("op_203_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_203_end_mask_0 = const()[name = tensor("op_203_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_203_cast_fp16 = slice_by_index(begin = var_203_begin_0, end = var_203_end_0, end_mask = var_203_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_203_cast_fp16")]; + tensor var_207_begin_0 = const()[name = tensor("op_207_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_207_end_0 = const()[name = tensor("op_207_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_207_end_mask_0 = const()[name = tensor("op_207_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = var_207_end_0, end_mask = var_207_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_207_cast_fp16")]; + tensor var_211_begin_0 = const()[name = tensor("op_211_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_211_end_0 = const()[name = tensor("op_211_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_211_end_mask_0 = const()[name = tensor("op_211_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_211_cast_fp16 = slice_by_index(begin = var_211_begin_0, end = var_211_end_0, end_mask = var_211_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_211_cast_fp16")]; + tensor var_215_begin_0 = const()[name = tensor("op_215_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_215_end_0 = const()[name = tensor("op_215_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_215_end_mask_0 = const()[name = tensor("op_215_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_215_cast_fp16 = slice_by_index(begin = var_215_begin_0, end = var_215_end_0, end_mask = var_215_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_215_cast_fp16")]; + tensor var_219_begin_0 = const()[name = tensor("op_219_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_219_end_0 = const()[name = tensor("op_219_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_219_end_mask_0 = const()[name = tensor("op_219_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_219_cast_fp16 = slice_by_index(begin = var_219_begin_0, end = var_219_end_0, end_mask = var_219_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_219_cast_fp16")]; + tensor var_223_begin_0 = const()[name = tensor("op_223_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_223_end_0 = const()[name = tensor("op_223_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_223_end_mask_0 = const()[name = tensor("op_223_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_223_cast_fp16 = slice_by_index(begin = var_223_begin_0, end = var_223_end_0, end_mask = var_223_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_223_cast_fp16")]; + tensor var_227_begin_0 = const()[name = tensor("op_227_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_227_end_0 = const()[name = tensor("op_227_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_227_end_mask_0 = const()[name = tensor("op_227_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_227_cast_fp16")]; + tensor var_231_begin_0 = const()[name = tensor("op_231_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_231_end_0 = const()[name = tensor("op_231_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_231_end_mask_0 = const()[name = tensor("op_231_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_231_cast_fp16 = slice_by_index(begin = var_231_begin_0, end = var_231_end_0, end_mask = var_231_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_231_cast_fp16")]; + tensor var_235_begin_0 = const()[name = tensor("op_235_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_235_end_0 = const()[name = tensor("op_235_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_235_end_mask_0 = const()[name = tensor("op_235_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = var_235_end_0, end_mask = var_235_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_235_cast_fp16")]; + tensor var_239_begin_0 = const()[name = tensor("op_239_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_239_end_0 = const()[name = tensor("op_239_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_239_end_mask_0 = const()[name = tensor("op_239_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_239_cast_fp16")]; + tensor var_243_begin_0 = const()[name = tensor("op_243_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_243_end_0 = const()[name = tensor("op_243_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_243_end_mask_0 = const()[name = tensor("op_243_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_243_cast_fp16")]; + tensor var_247_begin_0 = const()[name = tensor("op_247_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_247_end_0 = const()[name = tensor("op_247_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_247_end_mask_0 = const()[name = tensor("op_247_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_247_cast_fp16")]; + tensor var_251_begin_0 = const()[name = tensor("op_251_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_251_end_0 = const()[name = tensor("op_251_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_251_end_mask_0 = const()[name = tensor("op_251_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_251_cast_fp16")]; + tensor var_255_begin_0 = const()[name = tensor("op_255_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_255_end_0 = const()[name = tensor("op_255_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_255_end_mask_0 = const()[name = tensor("op_255_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_255_cast_fp16")]; + tensor var_259_begin_0 = const()[name = tensor("op_259_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_259_end_0 = const()[name = tensor("op_259_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_259_end_mask_0 = const()[name = tensor("op_259_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_263_cast_fp16")]; + tensor var_267_begin_0 = const()[name = tensor("op_267_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_267_end_0 = const()[name = tensor("op_267_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_267_end_mask_0 = const()[name = tensor("op_267_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_267_cast_fp16")]; + tensor var_271_begin_0 = const()[name = tensor("op_271_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_271_end_0 = const()[name = tensor("op_271_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_271_end_mask_0 = const()[name = tensor("op_271_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_271_cast_fp16")]; + tensor var_275_begin_0 = const()[name = tensor("op_275_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_275_end_0 = const()[name = tensor("op_275_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_275_end_mask_0 = const()[name = tensor("op_275_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_275_cast_fp16")]; + tensor var_279_begin_0 = const()[name = tensor("op_279_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_279_end_0 = const()[name = tensor("op_279_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_279_end_mask_0 = const()[name = tensor("op_279_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_279_cast_fp16")]; + tensor var_283_begin_0 = const()[name = tensor("op_283_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_283_end_0 = const()[name = tensor("op_283_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_283_end_mask_0 = const()[name = tensor("op_283_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_283_cast_fp16")]; + tensor var_289_begin_0 = const()[name = tensor("op_289_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_289_end_0 = const()[name = tensor("op_289_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_289_end_mask_0 = const()[name = tensor("op_289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_289_cast_fp16")]; + tensor var_301_begin_0 = const()[name = tensor("op_301_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_301_end_0 = const()[name = tensor("op_301_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_301_end_mask_0 = const()[name = tensor("op_301_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_301_cast_fp16")]; + tensor var_313_begin_0 = const()[name = tensor("op_313_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_313_end_0 = const()[name = tensor("op_313_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_313_end_mask_0 = const()[name = tensor("op_313_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_313_cast_fp16 = slice_by_index(begin = var_313_begin_0, end = var_313_end_0, end_mask = var_313_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_313_cast_fp16")]; + tensor var_325_begin_0 = const()[name = tensor("op_325_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_325_end_0 = const()[name = tensor("op_325_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_325_end_mask_0 = const()[name = tensor("op_325_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_325_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_349_begin_0 = const()[name = tensor("op_349_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_349_end_0 = const()[name = tensor("op_349_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_349_end_mask_0 = const()[name = tensor("op_349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_349_cast_fp16 = slice_by_index(begin = var_349_begin_0, end = var_349_end_0, end_mask = var_349_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_349_cast_fp16")]; + tensor var_361_begin_0 = const()[name = tensor("op_361_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_361_end_0 = const()[name = tensor("op_361_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_361_end_mask_0 = const()[name = tensor("op_361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_361_cast_fp16 = slice_by_index(begin = var_361_begin_0, end = var_361_end_0, end_mask = var_361_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_361_cast_fp16")]; + tensor var_373_begin_0 = const()[name = tensor("op_373_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_373_end_0 = const()[name = tensor("op_373_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_373_end_mask_0 = const()[name = tensor("op_373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_373_cast_fp16")]; + tensor var_383_begin_0 = const()[name = tensor("op_383_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_383_end_0 = const()[name = tensor("op_383_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_383_end_mask_0 = const()[name = tensor("op_383_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_383_cast_fp16")]; + tensor var_395_begin_0 = const()[name = tensor("op_395_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_395_end_0 = const()[name = tensor("op_395_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_395_end_mask_0 = const()[name = tensor("op_395_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_395_cast_fp16")]; + tensor var_407_begin_0 = const()[name = tensor("op_407_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_407_end_0 = const()[name = tensor("op_407_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_407_end_mask_0 = const()[name = tensor("op_407_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_407_cast_fp16 = slice_by_index(begin = var_407_begin_0, end = var_407_end_0, end_mask = var_407_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_407_cast_fp16")]; + tensor var_419_begin_0 = const()[name = tensor("op_419_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_419_end_0 = const()[name = tensor("op_419_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_419_end_mask_0 = const()[name = tensor("op_419_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_419_cast_fp16 = slice_by_index(begin = var_419_begin_0, end = var_419_end_0, end_mask = var_419_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_419_cast_fp16")]; + tensor var_431_begin_0 = const()[name = tensor("op_431_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_431_end_0 = const()[name = tensor("op_431_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_431_end_mask_0 = const()[name = tensor("op_431_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_431_cast_fp16 = slice_by_index(begin = var_431_begin_0, end = var_431_end_0, end_mask = var_431_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_431_cast_fp16")]; + tensor var_443_begin_0 = const()[name = tensor("op_443_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_443_end_0 = const()[name = tensor("op_443_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_443_end_mask_0 = const()[name = tensor("op_443_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_443_cast_fp16 = slice_by_index(begin = var_443_begin_0, end = var_443_end_0, end_mask = var_443_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_443_cast_fp16")]; + tensor var_455_begin_0 = const()[name = tensor("op_455_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_455_end_0 = const()[name = tensor("op_455_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_455_end_mask_0 = const()[name = tensor("op_455_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_455_cast_fp16")]; + tensor var_467_begin_0 = const()[name = tensor("op_467_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_467_end_0 = const()[name = tensor("op_467_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_467_end_mask_0 = const()[name = tensor("op_467_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_467_cast_fp16 = slice_by_index(begin = var_467_begin_0, end = var_467_end_0, end_mask = var_467_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_467_cast_fp16")]; + tensor var_479_equation_0 = const()[name = tensor("op_479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_289_cast_fp16, var_191_cast_fp16))[name = tensor("op_479_cast_fp16")]; + tensor var_480_to_fp16 = const()[name = tensor("op_480_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_481_cast_fp16 = mul(x = var_479_cast_fp16, y = var_480_to_fp16)[name = tensor("op_481_cast_fp16")]; + tensor var_483_equation_0 = const()[name = tensor("op_483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_289_cast_fp16, var_195_cast_fp16))[name = tensor("op_483_cast_fp16")]; + tensor var_484_to_fp16 = const()[name = tensor("op_484_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_485_cast_fp16 = mul(x = var_483_cast_fp16, y = var_484_to_fp16)[name = tensor("op_485_cast_fp16")]; + tensor var_487_equation_0 = const()[name = tensor("op_487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_487_cast_fp16 = einsum(equation = var_487_equation_0, values = (var_289_cast_fp16, var_199_cast_fp16))[name = tensor("op_487_cast_fp16")]; + tensor var_488_to_fp16 = const()[name = tensor("op_488_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_489_cast_fp16 = mul(x = var_487_cast_fp16, y = var_488_to_fp16)[name = tensor("op_489_cast_fp16")]; + tensor var_491_equation_0 = const()[name = tensor("op_491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_491_cast_fp16 = einsum(equation = var_491_equation_0, values = (var_301_cast_fp16, var_203_cast_fp16))[name = tensor("op_491_cast_fp16")]; + tensor var_492_to_fp16 = const()[name = tensor("op_492_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_493_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = tensor("op_493_cast_fp16")]; + tensor var_495_equation_0 = const()[name = tensor("op_495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_495_cast_fp16 = einsum(equation = var_495_equation_0, values = (var_301_cast_fp16, var_207_cast_fp16))[name = tensor("op_495_cast_fp16")]; + tensor var_496_to_fp16 = const()[name = tensor("op_496_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_497_cast_fp16 = mul(x = var_495_cast_fp16, y = var_496_to_fp16)[name = tensor("op_497_cast_fp16")]; + tensor var_499_equation_0 = const()[name = tensor("op_499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_499_cast_fp16 = einsum(equation = var_499_equation_0, values = (var_301_cast_fp16, var_211_cast_fp16))[name = tensor("op_499_cast_fp16")]; + tensor var_500_to_fp16 = const()[name = tensor("op_500_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_501_cast_fp16 = mul(x = var_499_cast_fp16, y = var_500_to_fp16)[name = tensor("op_501_cast_fp16")]; + tensor var_503_equation_0 = const()[name = tensor("op_503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_503_cast_fp16 = einsum(equation = var_503_equation_0, values = (var_313_cast_fp16, var_215_cast_fp16))[name = tensor("op_503_cast_fp16")]; + tensor var_504_to_fp16 = const()[name = tensor("op_504_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_505_cast_fp16 = mul(x = var_503_cast_fp16, y = var_504_to_fp16)[name = tensor("op_505_cast_fp16")]; + tensor var_507_equation_0 = const()[name = tensor("op_507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_507_cast_fp16 = einsum(equation = var_507_equation_0, values = (var_313_cast_fp16, var_219_cast_fp16))[name = tensor("op_507_cast_fp16")]; + tensor var_508_to_fp16 = const()[name = tensor("op_508_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16, y = var_508_to_fp16)[name = tensor("op_509_cast_fp16")]; + tensor var_511_equation_0 = const()[name = tensor("op_511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_511_cast_fp16 = einsum(equation = var_511_equation_0, values = (var_313_cast_fp16, var_223_cast_fp16))[name = tensor("op_511_cast_fp16")]; + tensor var_512_to_fp16 = const()[name = tensor("op_512_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_513_cast_fp16 = mul(x = var_511_cast_fp16, y = var_512_to_fp16)[name = tensor("op_513_cast_fp16")]; + tensor var_515_equation_0 = const()[name = tensor("op_515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_515_cast_fp16 = einsum(equation = var_515_equation_0, values = (var_325_cast_fp16, var_227_cast_fp16))[name = tensor("op_515_cast_fp16")]; + tensor var_516_to_fp16 = const()[name = tensor("op_516_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_517_cast_fp16 = mul(x = var_515_cast_fp16, y = var_516_to_fp16)[name = tensor("op_517_cast_fp16")]; + tensor var_519_equation_0 = const()[name = tensor("op_519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_519_cast_fp16 = einsum(equation = var_519_equation_0, values = (var_325_cast_fp16, var_231_cast_fp16))[name = tensor("op_519_cast_fp16")]; + tensor var_520_to_fp16 = const()[name = tensor("op_520_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_521_cast_fp16 = mul(x = var_519_cast_fp16, y = var_520_to_fp16)[name = tensor("op_521_cast_fp16")]; + tensor var_523_equation_0 = const()[name = tensor("op_523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_325_cast_fp16, var_235_cast_fp16))[name = tensor("op_523_cast_fp16")]; + tensor var_524_to_fp16 = const()[name = tensor("op_524_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_525_cast_fp16 = mul(x = var_523_cast_fp16, y = var_524_to_fp16)[name = tensor("op_525_cast_fp16")]; + tensor var_527_equation_0 = const()[name = tensor("op_527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_337_cast_fp16, var_239_cast_fp16))[name = tensor("op_527_cast_fp16")]; + tensor var_528_to_fp16 = const()[name = tensor("op_528_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_529_cast_fp16 = mul(x = var_527_cast_fp16, y = var_528_to_fp16)[name = tensor("op_529_cast_fp16")]; + tensor var_531_equation_0 = const()[name = tensor("op_531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_337_cast_fp16, var_243_cast_fp16))[name = tensor("op_531_cast_fp16")]; + tensor var_532_to_fp16 = const()[name = tensor("op_532_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_533_cast_fp16 = mul(x = var_531_cast_fp16, y = var_532_to_fp16)[name = tensor("op_533_cast_fp16")]; + tensor var_535_equation_0 = const()[name = tensor("op_535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_337_cast_fp16, var_247_cast_fp16))[name = tensor("op_535_cast_fp16")]; + tensor var_536_to_fp16 = const()[name = tensor("op_536_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_537_cast_fp16 = mul(x = var_535_cast_fp16, y = var_536_to_fp16)[name = tensor("op_537_cast_fp16")]; + tensor var_539_equation_0 = const()[name = tensor("op_539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_539_cast_fp16 = einsum(equation = var_539_equation_0, values = (var_349_cast_fp16, var_251_cast_fp16))[name = tensor("op_539_cast_fp16")]; + tensor var_540_to_fp16 = const()[name = tensor("op_540_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_541_cast_fp16 = mul(x = var_539_cast_fp16, y = var_540_to_fp16)[name = tensor("op_541_cast_fp16")]; + tensor var_543_equation_0 = const()[name = tensor("op_543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_543_cast_fp16 = einsum(equation = var_543_equation_0, values = (var_349_cast_fp16, var_255_cast_fp16))[name = tensor("op_543_cast_fp16")]; + tensor var_544_to_fp16 = const()[name = tensor("op_544_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_545_cast_fp16 = mul(x = var_543_cast_fp16, y = var_544_to_fp16)[name = tensor("op_545_cast_fp16")]; + tensor var_547_equation_0 = const()[name = tensor("op_547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_349_cast_fp16, var_259_cast_fp16))[name = tensor("op_547_cast_fp16")]; + tensor var_548_to_fp16 = const()[name = tensor("op_548_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_549_cast_fp16 = mul(x = var_547_cast_fp16, y = var_548_to_fp16)[name = tensor("op_549_cast_fp16")]; + tensor var_551_equation_0 = const()[name = tensor("op_551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_361_cast_fp16, var_263_cast_fp16))[name = tensor("op_551_cast_fp16")]; + tensor var_552_to_fp16 = const()[name = tensor("op_552_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_553_cast_fp16 = mul(x = var_551_cast_fp16, y = var_552_to_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_555_equation_0 = const()[name = tensor("op_555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_361_cast_fp16, var_267_cast_fp16))[name = tensor("op_555_cast_fp16")]; + tensor var_556_to_fp16 = const()[name = tensor("op_556_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_557_cast_fp16 = mul(x = var_555_cast_fp16, y = var_556_to_fp16)[name = tensor("op_557_cast_fp16")]; + tensor var_559_equation_0 = const()[name = tensor("op_559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_361_cast_fp16, var_271_cast_fp16))[name = tensor("op_559_cast_fp16")]; + tensor var_560_to_fp16 = const()[name = tensor("op_560_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = var_560_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor var_563_equation_0 = const()[name = tensor("op_563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_373_cast_fp16, var_275_cast_fp16))[name = tensor("op_563_cast_fp16")]; + tensor var_564_to_fp16 = const()[name = tensor("op_564_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_565_cast_fp16 = mul(x = var_563_cast_fp16, y = var_564_to_fp16)[name = tensor("op_565_cast_fp16")]; + tensor var_567_equation_0 = const()[name = tensor("op_567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_373_cast_fp16, var_279_cast_fp16))[name = tensor("op_567_cast_fp16")]; + tensor var_568_to_fp16 = const()[name = tensor("op_568_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_569_cast_fp16 = mul(x = var_567_cast_fp16, y = var_568_to_fp16)[name = tensor("op_569_cast_fp16")]; + tensor var_571_equation_0 = const()[name = tensor("op_571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_373_cast_fp16, var_283_cast_fp16))[name = tensor("op_571_cast_fp16")]; + tensor var_572_to_fp16 = const()[name = tensor("op_572_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_573_cast_fp16 = mul(x = var_571_cast_fp16, y = var_572_to_fp16)[name = tensor("op_573_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_481_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_485_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_489_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_493_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_497_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_501_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_505_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_509_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_513_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_517_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_521_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_525_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_529_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_533_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_537_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_541_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_545_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_549_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_553_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_557_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_561_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_565_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_569_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_573_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor var_598_cast_fp16 = softmax(axis = var_52, x = aw_1_cast_fp16)[name = tensor("op_598_cast_fp16")]; + tensor var_599_cast_fp16 = softmax(axis = var_52, x = aw_3_cast_fp16)[name = tensor("op_599_cast_fp16")]; + tensor var_600_cast_fp16 = softmax(axis = var_52, x = aw_5_cast_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_601_cast_fp16 = softmax(axis = var_52, x = aw_7_cast_fp16)[name = tensor("op_601_cast_fp16")]; + tensor var_602_cast_fp16 = softmax(axis = var_52, x = aw_9_cast_fp16)[name = tensor("op_602_cast_fp16")]; + tensor var_603_cast_fp16 = softmax(axis = var_52, x = aw_11_cast_fp16)[name = tensor("op_603_cast_fp16")]; + tensor var_604_cast_fp16 = softmax(axis = var_52, x = aw_13_cast_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_605_cast_fp16 = softmax(axis = var_52, x = aw_15_cast_fp16)[name = tensor("op_605_cast_fp16")]; + tensor var_606_cast_fp16 = softmax(axis = var_52, x = aw_17_cast_fp16)[name = tensor("op_606_cast_fp16")]; + tensor var_607_cast_fp16 = softmax(axis = var_52, x = aw_19_cast_fp16)[name = tensor("op_607_cast_fp16")]; + tensor var_608_cast_fp16 = softmax(axis = var_52, x = aw_21_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_609_cast_fp16 = softmax(axis = var_52, x = aw_23_cast_fp16)[name = tensor("op_609_cast_fp16")]; + tensor var_610_cast_fp16 = softmax(axis = var_52, x = aw_25_cast_fp16)[name = tensor("op_610_cast_fp16")]; + tensor var_611_cast_fp16 = softmax(axis = var_52, x = aw_27_cast_fp16)[name = tensor("op_611_cast_fp16")]; + tensor var_612_cast_fp16 = softmax(axis = var_52, x = aw_29_cast_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_613_cast_fp16 = softmax(axis = var_52, x = aw_31_cast_fp16)[name = tensor("op_613_cast_fp16")]; + tensor var_614_cast_fp16 = softmax(axis = var_52, x = aw_33_cast_fp16)[name = tensor("op_614_cast_fp16")]; + tensor var_615_cast_fp16 = softmax(axis = var_52, x = aw_35_cast_fp16)[name = tensor("op_615_cast_fp16")]; + tensor var_616_cast_fp16 = softmax(axis = var_52, x = aw_37_cast_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_617_cast_fp16 = softmax(axis = var_52, x = aw_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618_cast_fp16 = softmax(axis = var_52, x = aw_41_cast_fp16)[name = tensor("op_618_cast_fp16")]; + tensor var_619_cast_fp16 = softmax(axis = var_52, x = aw_43_cast_fp16)[name = tensor("op_619_cast_fp16")]; + tensor var_620_cast_fp16 = softmax(axis = var_52, x = aw_45_cast_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_621_cast_fp16 = softmax(axis = var_52, x = aw_47_cast_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_623_equation_0 = const()[name = tensor("op_623_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_383_cast_fp16, var_598_cast_fp16))[name = tensor("op_623_cast_fp16")]; + tensor var_625_equation_0 = const()[name = tensor("op_625_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_383_cast_fp16, var_599_cast_fp16))[name = tensor("op_625_cast_fp16")]; + tensor var_627_equation_0 = const()[name = tensor("op_627_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_383_cast_fp16, var_600_cast_fp16))[name = tensor("op_627_cast_fp16")]; + tensor var_629_equation_0 = const()[name = tensor("op_629_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_395_cast_fp16, var_601_cast_fp16))[name = tensor("op_629_cast_fp16")]; + tensor var_631_equation_0 = const()[name = tensor("op_631_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_395_cast_fp16, var_602_cast_fp16))[name = tensor("op_631_cast_fp16")]; + tensor var_633_equation_0 = const()[name = tensor("op_633_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_395_cast_fp16, var_603_cast_fp16))[name = tensor("op_633_cast_fp16")]; + tensor var_635_equation_0 = const()[name = tensor("op_635_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_407_cast_fp16, var_604_cast_fp16))[name = tensor("op_635_cast_fp16")]; + tensor var_637_equation_0 = const()[name = tensor("op_637_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_407_cast_fp16, var_605_cast_fp16))[name = tensor("op_637_cast_fp16")]; + tensor var_639_equation_0 = const()[name = tensor("op_639_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_407_cast_fp16, var_606_cast_fp16))[name = tensor("op_639_cast_fp16")]; + tensor var_641_equation_0 = const()[name = tensor("op_641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_419_cast_fp16, var_607_cast_fp16))[name = tensor("op_641_cast_fp16")]; + tensor var_643_equation_0 = const()[name = tensor("op_643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_419_cast_fp16, var_608_cast_fp16))[name = tensor("op_643_cast_fp16")]; + tensor var_645_equation_0 = const()[name = tensor("op_645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_419_cast_fp16, var_609_cast_fp16))[name = tensor("op_645_cast_fp16")]; + tensor var_647_equation_0 = const()[name = tensor("op_647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_647_cast_fp16 = einsum(equation = var_647_equation_0, values = (var_431_cast_fp16, var_610_cast_fp16))[name = tensor("op_647_cast_fp16")]; + tensor var_649_equation_0 = const()[name = tensor("op_649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_649_cast_fp16 = einsum(equation = var_649_equation_0, values = (var_431_cast_fp16, var_611_cast_fp16))[name = tensor("op_649_cast_fp16")]; + tensor var_651_equation_0 = const()[name = tensor("op_651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_651_cast_fp16 = einsum(equation = var_651_equation_0, values = (var_431_cast_fp16, var_612_cast_fp16))[name = tensor("op_651_cast_fp16")]; + tensor var_653_equation_0 = const()[name = tensor("op_653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_653_cast_fp16 = einsum(equation = var_653_equation_0, values = (var_443_cast_fp16, var_613_cast_fp16))[name = tensor("op_653_cast_fp16")]; + tensor var_655_equation_0 = const()[name = tensor("op_655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_655_cast_fp16 = einsum(equation = var_655_equation_0, values = (var_443_cast_fp16, var_614_cast_fp16))[name = tensor("op_655_cast_fp16")]; + tensor var_657_equation_0 = const()[name = tensor("op_657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_657_cast_fp16 = einsum(equation = var_657_equation_0, values = (var_443_cast_fp16, var_615_cast_fp16))[name = tensor("op_657_cast_fp16")]; + tensor var_659_equation_0 = const()[name = tensor("op_659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_659_cast_fp16 = einsum(equation = var_659_equation_0, values = (var_455_cast_fp16, var_616_cast_fp16))[name = tensor("op_659_cast_fp16")]; + tensor var_661_equation_0 = const()[name = tensor("op_661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_661_cast_fp16 = einsum(equation = var_661_equation_0, values = (var_455_cast_fp16, var_617_cast_fp16))[name = tensor("op_661_cast_fp16")]; + tensor var_663_equation_0 = const()[name = tensor("op_663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_663_cast_fp16 = einsum(equation = var_663_equation_0, values = (var_455_cast_fp16, var_618_cast_fp16))[name = tensor("op_663_cast_fp16")]; + tensor var_665_equation_0 = const()[name = tensor("op_665_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_665_cast_fp16 = einsum(equation = var_665_equation_0, values = (var_467_cast_fp16, var_619_cast_fp16))[name = tensor("op_665_cast_fp16")]; + tensor var_667_equation_0 = const()[name = tensor("op_667_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_667_cast_fp16 = einsum(equation = var_667_equation_0, values = (var_467_cast_fp16, var_620_cast_fp16))[name = tensor("op_667_cast_fp16")]; + tensor var_669_equation_0 = const()[name = tensor("op_669_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_669_cast_fp16 = einsum(equation = var_669_equation_0, values = (var_467_cast_fp16, var_621_cast_fp16))[name = tensor("op_669_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_52, interleave = x_11_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16, var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16, var_647_cast_fp16, var_649_cast_fp16, var_651_cast_fp16, var_653_cast_fp16, var_655_cast_fp16, var_657_cast_fp16, var_659_cast_fp16, var_661_cast_fp16, var_663_cast_fp16, var_665_cast_fp16, var_667_cast_fp16, var_669_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_674 = const()[name = tensor("op_674"), val = tensor([1, 3072, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_674, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_677 = const()[name = tensor("op_677"), val = tensor([1, 1])]; + tensor var_679 = const()[name = tensor("op_679"), val = tensor([1, 1])]; + tensor attention_output_1_pad_type_0 = const()[name = tensor("attention_output_1_pad_type_0"), val = tensor("custom")]; + tensor attention_output_1_pad_0 = const()[name = tensor("attention_output_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31463936)))]; + tensor attention_output_1_cast_fp16 = conv(dilations = var_679, groups = var_52, pad = attention_output_1_pad_0, pad_type = attention_output_1_pad_type_0, strides = var_677, weight = blocks_0_attn_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338368)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_52, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_55, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_705_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338560)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_716 = const()[name = tensor("op_716"), val = tensor([1, 1])]; + tensor var_718 = const()[name = tensor("op_718"), val = tensor([1, 1])]; + tensor input_7_pad_type_0 = const()[name = tensor("input_7_pad_type_0"), val = tensor("custom")]; + tensor input_7_pad_0 = const()[name = tensor("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50344768)))]; + tensor input_7_cast_fp16 = conv(dilations = var_718, groups = var_52, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_716, weight = blocks_0_mlp_fc_1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_722 = const()[name = tensor("op_722"), val = tensor([1, 1])]; + tensor var_724 = const()[name = tensor("op_724"), val = tensor([1, 1])]; + tensor x_fc_2_1_pad_type_0 = const()[name = tensor("x_fc_2_1_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_1_pad_0 = const()[name = tensor("x_fc_2_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100676480)))]; + tensor x_fc_2_1_cast_fp16 = conv(dilations = var_724, groups = var_52, pad = x_fc_2_1_pad_0, pad_type = x_fc_2_1_pad_type_0, strides = var_722, weight = blocks_0_mlp_fc_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_727_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_727_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_727_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_730 = const()[name = tensor("op_730"), val = tensor([1, 1])]; + tensor var_732 = const()[name = tensor("op_732"), val = tensor([1, 1])]; + tensor var_734_pad_type_0 = const()[name = tensor("op_734_pad_type_0"), val = tensor("custom")]; + tensor var_734_pad_0 = const()[name = tensor("op_734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151008192)))]; + tensor var_734_cast_fp16 = conv(dilations = var_732, groups = var_52, pad = var_734_pad_0, pad_type = var_734_pad_type_0, strides = var_730, weight = blocks_0_mlp_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("op_734_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_734_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_740 = const()[name = tensor("op_740"), val = tensor(-1)]; + tensor var_744 = const()[name = tensor("op_744"), val = tensor(-2)]; + tensor var_746 = const()[name = tensor("op_746"), val = tensor(-3)]; + tensor var_779 = const()[name = tensor("op_779"), val = tensor(1)]; + tensor var_782 = const()[name = tensor("op_782"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201339904)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_779, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_782, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_805_to_fp16 = const()[name = tensor("op_805_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_805_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201340096)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_829 = const()[name = tensor("op_829"), val = tensor([1, 3072, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_829, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_832 = const()[name = tensor("op_832"), val = tensor([1, 1])]; + tensor var_834 = const()[name = tensor("op_834"), val = tensor([1, 1])]; + tensor q_9_pad_type_0 = const()[name = tensor("q_9_pad_type_0"), val = tensor("custom")]; + tensor q_9_pad_0 = const()[name = tensor("q_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201346304)))]; + tensor q_9_cast_fp16 = conv(dilations = var_834, groups = var_779, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = var_832, weight = blocks_1_attn_q_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_838 = const()[name = tensor("op_838"), val = tensor([1, 1])]; + tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 1])]; + tensor k_13_pad_type_0 = const()[name = tensor("k_13_pad_type_0"), val = tensor("custom")]; + tensor k_13_pad_0 = const()[name = tensor("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220220736)))]; + tensor k_13_cast_fp16 = conv(dilations = var_840, groups = var_779, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = var_838, weight = blocks_1_attn_k_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_844 = const()[name = tensor("op_844"), val = tensor([1, 1])]; + tensor var_846 = const()[name = tensor("op_846"), val = tensor([1, 1])]; + tensor v_11_pad_type_0 = const()[name = tensor("v_11_pad_type_0"), val = tensor("custom")]; + tensor v_11_pad_0 = const()[name = tensor("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226512256)))]; + tensor v_11_cast_fp16 = conv(dilations = var_846, groups = var_779, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = var_844, weight = blocks_1_attn_v_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_849 = const()[name = tensor("op_849"), val = tensor([1, 24, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_849, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_851 = const()[name = tensor("op_851"), val = tensor([1, -1, 128, 64])]; + tensor k_15_cast_fp16 = reshape(shape = var_851, x = k_13_cast_fp16)[name = tensor("k_15_cast_fp16")]; + tensor var_865_begin_0 = const()[name = tensor("op_865_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_865_end_0 = const()[name = tensor("op_865_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_865_end_mask_0 = const()[name = tensor("op_865_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_865_cast_fp16")]; + tensor var_871_begin_0 = const()[name = tensor("op_871_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_871_end_0 = const()[name = tensor("op_871_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_871_end_mask_0 = const()[name = tensor("op_871_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_871_cast_fp16 = slice_by_index(begin = var_871_begin_0, end = var_871_end_0, end_mask = var_871_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_871_cast_fp16")]; + tensor const_30_promoted_to_fp16 = const()[name = tensor("const_30_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_873_cast_fp16 = mul(x = var_871_cast_fp16, y = const_30_promoted_to_fp16)[name = tensor("op_873_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_744, interleave = rotated_5_interleave_0, values = (var_873_cast_fp16, var_865_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_876_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_876_cast_fp16")]; + tensor var_877_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_877_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_876_cast_fp16, y = var_877_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_890_begin_0 = const()[name = tensor("op_890_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_890_end_0 = const()[name = tensor("op_890_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_890_end_mask_0 = const()[name = tensor("op_890_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_890_cast_fp16 = slice_by_index(begin = var_890_begin_0, end = var_890_end_0, end_mask = var_890_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_890_cast_fp16")]; + tensor var_896_begin_0 = const()[name = tensor("op_896_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_896_end_0 = const()[name = tensor("op_896_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_896_end_mask_0 = const()[name = tensor("op_896_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_896_cast_fp16 = slice_by_index(begin = var_896_begin_0, end = var_896_end_0, end_mask = var_896_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_896_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_898_cast_fp16 = mul(x = var_896_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_898_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_744, interleave = rotated_interleave_0, values = (var_898_cast_fp16, var_890_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_901_cast_fp16 = mul(x = k_15_cast_fp16, y = cos)[name = tensor("op_901_cast_fp16")]; + tensor var_902_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_902_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_901_cast_fp16, y = var_902_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_905 = const()[name = tensor("op_905"), val = tensor([1, -1, 1, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_905, x = roped_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_907 = const()[name = tensor("op_907"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_907, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_21_perm_0, x = k_19_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_746, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_740, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_915 = const()[name = tensor("op_915"), val = tensor([1, 3072, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_915, x = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_920_begin_0 = const()[name = tensor("op_920_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_end_0 = const()[name = tensor("op_920_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_920_end_mask_0 = const()[name = tensor("op_920_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = q_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor var_924_begin_0 = const()[name = tensor("op_924_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_924_end_0 = const()[name = tensor("op_924_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_924_end_mask_0 = const()[name = tensor("op_924_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_924_cast_fp16 = slice_by_index(begin = var_924_begin_0, end = var_924_end_0, end_mask = var_924_end_mask_0, x = q_cast_fp16)[name = tensor("op_924_cast_fp16")]; + tensor var_928_begin_0 = const()[name = tensor("op_928_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_928_end_0 = const()[name = tensor("op_928_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_928_end_mask_0 = const()[name = tensor("op_928_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = q_cast_fp16)[name = tensor("op_928_cast_fp16")]; + tensor var_932_begin_0 = const()[name = tensor("op_932_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_932_end_0 = const()[name = tensor("op_932_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_932_end_mask_0 = const()[name = tensor("op_932_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_932_cast_fp16 = slice_by_index(begin = var_932_begin_0, end = var_932_end_0, end_mask = var_932_end_mask_0, x = q_cast_fp16)[name = tensor("op_932_cast_fp16")]; + tensor var_936_begin_0 = const()[name = tensor("op_936_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_936_end_0 = const()[name = tensor("op_936_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_936_end_mask_0 = const()[name = tensor("op_936_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_936_cast_fp16 = slice_by_index(begin = var_936_begin_0, end = var_936_end_0, end_mask = var_936_end_mask_0, x = q_cast_fp16)[name = tensor("op_936_cast_fp16")]; + tensor var_940_begin_0 = const()[name = tensor("op_940_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_940_end_0 = const()[name = tensor("op_940_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_940_end_mask_0 = const()[name = tensor("op_940_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_940_cast_fp16 = slice_by_index(begin = var_940_begin_0, end = var_940_end_0, end_mask = var_940_end_mask_0, x = q_cast_fp16)[name = tensor("op_940_cast_fp16")]; + tensor var_944_begin_0 = const()[name = tensor("op_944_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_944_end_0 = const()[name = tensor("op_944_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_944_end_mask_0 = const()[name = tensor("op_944_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_944_cast_fp16 = slice_by_index(begin = var_944_begin_0, end = var_944_end_0, end_mask = var_944_end_mask_0, x = q_cast_fp16)[name = tensor("op_944_cast_fp16")]; + tensor var_948_begin_0 = const()[name = tensor("op_948_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_948_end_0 = const()[name = tensor("op_948_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_948_end_mask_0 = const()[name = tensor("op_948_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_948_cast_fp16 = slice_by_index(begin = var_948_begin_0, end = var_948_end_0, end_mask = var_948_end_mask_0, x = q_cast_fp16)[name = tensor("op_948_cast_fp16")]; + tensor var_952_begin_0 = const()[name = tensor("op_952_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_952_end_0 = const()[name = tensor("op_952_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_952_end_mask_0 = const()[name = tensor("op_952_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_952_cast_fp16 = slice_by_index(begin = var_952_begin_0, end = var_952_end_0, end_mask = var_952_end_mask_0, x = q_cast_fp16)[name = tensor("op_952_cast_fp16")]; + tensor var_956_begin_0 = const()[name = tensor("op_956_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_956_end_0 = const()[name = tensor("op_956_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_956_end_mask_0 = const()[name = tensor("op_956_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_956_cast_fp16 = slice_by_index(begin = var_956_begin_0, end = var_956_end_0, end_mask = var_956_end_mask_0, x = q_cast_fp16)[name = tensor("op_956_cast_fp16")]; + tensor var_960_begin_0 = const()[name = tensor("op_960_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_960_end_0 = const()[name = tensor("op_960_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_960_end_mask_0 = const()[name = tensor("op_960_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_960_cast_fp16 = slice_by_index(begin = var_960_begin_0, end = var_960_end_0, end_mask = var_960_end_mask_0, x = q_cast_fp16)[name = tensor("op_960_cast_fp16")]; + tensor var_964_begin_0 = const()[name = tensor("op_964_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_964_end_0 = const()[name = tensor("op_964_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_964_end_mask_0 = const()[name = tensor("op_964_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_964_cast_fp16 = slice_by_index(begin = var_964_begin_0, end = var_964_end_0, end_mask = var_964_end_mask_0, x = q_cast_fp16)[name = tensor("op_964_cast_fp16")]; + tensor var_968_begin_0 = const()[name = tensor("op_968_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_968_end_0 = const()[name = tensor("op_968_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_968_end_mask_0 = const()[name = tensor("op_968_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = q_cast_fp16)[name = tensor("op_968_cast_fp16")]; + tensor var_972_begin_0 = const()[name = tensor("op_972_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_972_end_0 = const()[name = tensor("op_972_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_972_end_mask_0 = const()[name = tensor("op_972_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = q_cast_fp16)[name = tensor("op_972_cast_fp16")]; + tensor var_976_begin_0 = const()[name = tensor("op_976_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_976_end_0 = const()[name = tensor("op_976_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_976_end_mask_0 = const()[name = tensor("op_976_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = var_976_end_0, end_mask = var_976_end_mask_0, x = q_cast_fp16)[name = tensor("op_976_cast_fp16")]; + tensor var_980_begin_0 = const()[name = tensor("op_980_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_980_end_0 = const()[name = tensor("op_980_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_980_end_mask_0 = const()[name = tensor("op_980_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_980_cast_fp16 = slice_by_index(begin = var_980_begin_0, end = var_980_end_0, end_mask = var_980_end_mask_0, x = q_cast_fp16)[name = tensor("op_980_cast_fp16")]; + tensor var_984_begin_0 = const()[name = tensor("op_984_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_984_end_0 = const()[name = tensor("op_984_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_984_end_mask_0 = const()[name = tensor("op_984_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_984_cast_fp16 = slice_by_index(begin = var_984_begin_0, end = var_984_end_0, end_mask = var_984_end_mask_0, x = q_cast_fp16)[name = tensor("op_984_cast_fp16")]; + tensor var_988_begin_0 = const()[name = tensor("op_988_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_988_end_0 = const()[name = tensor("op_988_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_988_end_mask_0 = const()[name = tensor("op_988_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_988_cast_fp16 = slice_by_index(begin = var_988_begin_0, end = var_988_end_0, end_mask = var_988_end_mask_0, x = q_cast_fp16)[name = tensor("op_988_cast_fp16")]; + tensor var_992_begin_0 = const()[name = tensor("op_992_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_992_end_0 = const()[name = tensor("op_992_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_992_end_mask_0 = const()[name = tensor("op_992_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_992_cast_fp16 = slice_by_index(begin = var_992_begin_0, end = var_992_end_0, end_mask = var_992_end_mask_0, x = q_cast_fp16)[name = tensor("op_992_cast_fp16")]; + tensor var_996_begin_0 = const()[name = tensor("op_996_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_996_end_0 = const()[name = tensor("op_996_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_996_end_mask_0 = const()[name = tensor("op_996_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_996_cast_fp16 = slice_by_index(begin = var_996_begin_0, end = var_996_end_0, end_mask = var_996_end_mask_0, x = q_cast_fp16)[name = tensor("op_996_cast_fp16")]; + tensor var_1000_begin_0 = const()[name = tensor("op_1000_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1000_end_0 = const()[name = tensor("op_1000_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1000_end_mask_0 = const()[name = tensor("op_1000_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = var_1000_end_0, end_mask = var_1000_end_mask_0, x = q_cast_fp16)[name = tensor("op_1000_cast_fp16")]; + tensor var_1004_begin_0 = const()[name = tensor("op_1004_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1004_end_0 = const()[name = tensor("op_1004_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1004_end_mask_0 = const()[name = tensor("op_1004_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1004_cast_fp16 = slice_by_index(begin = var_1004_begin_0, end = var_1004_end_0, end_mask = var_1004_end_mask_0, x = q_cast_fp16)[name = tensor("op_1004_cast_fp16")]; + tensor var_1008_begin_0 = const()[name = tensor("op_1008_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1008_end_0 = const()[name = tensor("op_1008_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1008_end_mask_0 = const()[name = tensor("op_1008_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = var_1008_end_0, end_mask = var_1008_end_mask_0, x = q_cast_fp16)[name = tensor("op_1008_cast_fp16")]; + tensor var_1012_begin_0 = const()[name = tensor("op_1012_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1012_end_0 = const()[name = tensor("op_1012_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1012_end_mask_0 = const()[name = tensor("op_1012_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1012_cast_fp16 = slice_by_index(begin = var_1012_begin_0, end = var_1012_end_0, end_mask = var_1012_end_mask_0, x = q_cast_fp16)[name = tensor("op_1012_cast_fp16")]; + tensor var_1018_begin_0 = const()[name = tensor("op_1018_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1018_end_0 = const()[name = tensor("op_1018_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1018_end_mask_0 = const()[name = tensor("op_1018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1018_cast_fp16 = slice_by_index(begin = var_1018_begin_0, end = var_1018_end_0, end_mask = var_1018_end_mask_0, x = k_cast_fp16)[name = tensor("op_1018_cast_fp16")]; + tensor var_1030_begin_0 = const()[name = tensor("op_1030_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1030_end_0 = const()[name = tensor("op_1030_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1030_end_mask_0 = const()[name = tensor("op_1030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, x = k_cast_fp16)[name = tensor("op_1030_cast_fp16")]; + tensor var_1042_begin_0 = const()[name = tensor("op_1042_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1042_end_0 = const()[name = tensor("op_1042_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1042_end_mask_0 = const()[name = tensor("op_1042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = k_cast_fp16)[name = tensor("op_1042_cast_fp16")]; + tensor var_1054_begin_0 = const()[name = tensor("op_1054_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1054_end_0 = const()[name = tensor("op_1054_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1054_end_mask_0 = const()[name = tensor("op_1054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1054_cast_fp16 = slice_by_index(begin = var_1054_begin_0, end = var_1054_end_0, end_mask = var_1054_end_mask_0, x = k_cast_fp16)[name = tensor("op_1054_cast_fp16")]; + tensor var_1066_begin_0 = const()[name = tensor("op_1066_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1066_end_0 = const()[name = tensor("op_1066_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1066_end_mask_0 = const()[name = tensor("op_1066_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1066_cast_fp16 = slice_by_index(begin = var_1066_begin_0, end = var_1066_end_0, end_mask = var_1066_end_mask_0, x = k_cast_fp16)[name = tensor("op_1066_cast_fp16")]; + tensor var_1078_begin_0 = const()[name = tensor("op_1078_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1078_end_0 = const()[name = tensor("op_1078_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1078_end_mask_0 = const()[name = tensor("op_1078_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1078_cast_fp16 = slice_by_index(begin = var_1078_begin_0, end = var_1078_end_0, end_mask = var_1078_end_mask_0, x = k_cast_fp16)[name = tensor("op_1078_cast_fp16")]; + tensor var_1090_begin_0 = const()[name = tensor("op_1090_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1090_end_0 = const()[name = tensor("op_1090_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1090_end_mask_0 = const()[name = tensor("op_1090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = k_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor var_1102_begin_0 = const()[name = tensor("op_1102_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1102_end_0 = const()[name = tensor("op_1102_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1102_end_mask_0 = const()[name = tensor("op_1102_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1102_cast_fp16 = slice_by_index(begin = var_1102_begin_0, end = var_1102_end_0, end_mask = var_1102_end_mask_0, x = k_cast_fp16)[name = tensor("op_1102_cast_fp16")]; + tensor var_1112_begin_0 = const()[name = tensor("op_1112_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1112_end_0 = const()[name = tensor("op_1112_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1112_end_mask_0 = const()[name = tensor("op_1112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1112_cast_fp16")]; + tensor var_1124_begin_0 = const()[name = tensor("op_1124_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1124_end_0 = const()[name = tensor("op_1124_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1124_end_mask_0 = const()[name = tensor("op_1124_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1124_cast_fp16")]; + tensor var_1136_begin_0 = const()[name = tensor("op_1136_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1136_end_0 = const()[name = tensor("op_1136_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1136_end_mask_0 = const()[name = tensor("op_1136_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1136_cast_fp16")]; + tensor var_1148_begin_0 = const()[name = tensor("op_1148_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1148_end_0 = const()[name = tensor("op_1148_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1148_end_mask_0 = const()[name = tensor("op_1148_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1148_cast_fp16")]; + tensor var_1160_begin_0 = const()[name = tensor("op_1160_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1160_end_0 = const()[name = tensor("op_1160_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1160_end_mask_0 = const()[name = tensor("op_1160_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1160_cast_fp16")]; + tensor var_1172_begin_0 = const()[name = tensor("op_1172_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1172_end_0 = const()[name = tensor("op_1172_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1172_end_mask_0 = const()[name = tensor("op_1172_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1172_cast_fp16 = slice_by_index(begin = var_1172_begin_0, end = var_1172_end_0, end_mask = var_1172_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1172_cast_fp16")]; + tensor var_1184_begin_0 = const()[name = tensor("op_1184_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1184_end_0 = const()[name = tensor("op_1184_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1184_end_mask_0 = const()[name = tensor("op_1184_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1184_cast_fp16 = slice_by_index(begin = var_1184_begin_0, end = var_1184_end_0, end_mask = var_1184_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1184_cast_fp16")]; + tensor var_1196_begin_0 = const()[name = tensor("op_1196_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1196_end_0 = const()[name = tensor("op_1196_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1196_end_mask_0 = const()[name = tensor("op_1196_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1196_cast_fp16")]; + tensor var_1208_equation_0 = const()[name = tensor("op_1208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1208_cast_fp16 = einsum(equation = var_1208_equation_0, values = (var_1018_cast_fp16, var_920_cast_fp16))[name = tensor("op_1208_cast_fp16")]; + tensor var_1209_to_fp16 = const()[name = tensor("op_1209_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1210_cast_fp16 = mul(x = var_1208_cast_fp16, y = var_1209_to_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1212_equation_0 = const()[name = tensor("op_1212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1212_cast_fp16 = einsum(equation = var_1212_equation_0, values = (var_1018_cast_fp16, var_924_cast_fp16))[name = tensor("op_1212_cast_fp16")]; + tensor var_1213_to_fp16 = const()[name = tensor("op_1213_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1214_cast_fp16 = mul(x = var_1212_cast_fp16, y = var_1213_to_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1216_equation_0 = const()[name = tensor("op_1216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1216_cast_fp16 = einsum(equation = var_1216_equation_0, values = (var_1018_cast_fp16, var_928_cast_fp16))[name = tensor("op_1216_cast_fp16")]; + tensor var_1217_to_fp16 = const()[name = tensor("op_1217_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1218_cast_fp16 = mul(x = var_1216_cast_fp16, y = var_1217_to_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1220_equation_0 = const()[name = tensor("op_1220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1220_cast_fp16 = einsum(equation = var_1220_equation_0, values = (var_1030_cast_fp16, var_932_cast_fp16))[name = tensor("op_1220_cast_fp16")]; + tensor var_1221_to_fp16 = const()[name = tensor("op_1221_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1222_cast_fp16 = mul(x = var_1220_cast_fp16, y = var_1221_to_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1224_equation_0 = const()[name = tensor("op_1224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1224_cast_fp16 = einsum(equation = var_1224_equation_0, values = (var_1030_cast_fp16, var_936_cast_fp16))[name = tensor("op_1224_cast_fp16")]; + tensor var_1225_to_fp16 = const()[name = tensor("op_1225_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1226_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1030_cast_fp16, var_940_cast_fp16))[name = tensor("op_1228_cast_fp16")]; + tensor var_1229_to_fp16 = const()[name = tensor("op_1229_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1230_cast_fp16 = mul(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1042_cast_fp16, var_944_cast_fp16))[name = tensor("op_1232_cast_fp16")]; + tensor var_1233_to_fp16 = const()[name = tensor("op_1233_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1234_cast_fp16 = mul(x = var_1232_cast_fp16, y = var_1233_to_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1042_cast_fp16, var_948_cast_fp16))[name = tensor("op_1236_cast_fp16")]; + tensor var_1237_to_fp16 = const()[name = tensor("op_1237_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1238_cast_fp16 = mul(x = var_1236_cast_fp16, y = var_1237_to_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1042_cast_fp16, var_952_cast_fp16))[name = tensor("op_1240_cast_fp16")]; + tensor var_1241_to_fp16 = const()[name = tensor("op_1241_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1242_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1054_cast_fp16, var_956_cast_fp16))[name = tensor("op_1244_cast_fp16")]; + tensor var_1245_to_fp16 = const()[name = tensor("op_1245_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1246_cast_fp16 = mul(x = var_1244_cast_fp16, y = var_1245_to_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1054_cast_fp16, var_960_cast_fp16))[name = tensor("op_1248_cast_fp16")]; + tensor var_1249_to_fp16 = const()[name = tensor("op_1249_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1250_cast_fp16 = mul(x = var_1248_cast_fp16, y = var_1249_to_fp16)[name = tensor("op_1250_cast_fp16")]; + tensor var_1252_equation_0 = const()[name = tensor("op_1252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1054_cast_fp16, var_964_cast_fp16))[name = tensor("op_1252_cast_fp16")]; + tensor var_1253_to_fp16 = const()[name = tensor("op_1253_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1254_cast_fp16 = mul(x = var_1252_cast_fp16, y = var_1253_to_fp16)[name = tensor("op_1254_cast_fp16")]; + tensor var_1256_equation_0 = const()[name = tensor("op_1256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1066_cast_fp16, var_968_cast_fp16))[name = tensor("op_1256_cast_fp16")]; + tensor var_1257_to_fp16 = const()[name = tensor("op_1257_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1258_cast_fp16 = mul(x = var_1256_cast_fp16, y = var_1257_to_fp16)[name = tensor("op_1258_cast_fp16")]; + tensor var_1260_equation_0 = const()[name = tensor("op_1260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1066_cast_fp16, var_972_cast_fp16))[name = tensor("op_1260_cast_fp16")]; + tensor var_1261_to_fp16 = const()[name = tensor("op_1261_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1262_cast_fp16 = mul(x = var_1260_cast_fp16, y = var_1261_to_fp16)[name = tensor("op_1262_cast_fp16")]; + tensor var_1264_equation_0 = const()[name = tensor("op_1264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1066_cast_fp16, var_976_cast_fp16))[name = tensor("op_1264_cast_fp16")]; + tensor var_1265_to_fp16 = const()[name = tensor("op_1265_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1266_cast_fp16 = mul(x = var_1264_cast_fp16, y = var_1265_to_fp16)[name = tensor("op_1266_cast_fp16")]; + tensor var_1268_equation_0 = const()[name = tensor("op_1268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1078_cast_fp16, var_980_cast_fp16))[name = tensor("op_1268_cast_fp16")]; + tensor var_1269_to_fp16 = const()[name = tensor("op_1269_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1270_cast_fp16 = mul(x = var_1268_cast_fp16, y = var_1269_to_fp16)[name = tensor("op_1270_cast_fp16")]; + tensor var_1272_equation_0 = const()[name = tensor("op_1272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1078_cast_fp16, var_984_cast_fp16))[name = tensor("op_1272_cast_fp16")]; + tensor var_1273_to_fp16 = const()[name = tensor("op_1273_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1274_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = tensor("op_1274_cast_fp16")]; + tensor var_1276_equation_0 = const()[name = tensor("op_1276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1078_cast_fp16, var_988_cast_fp16))[name = tensor("op_1276_cast_fp16")]; + tensor var_1277_to_fp16 = const()[name = tensor("op_1277_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1278_cast_fp16 = mul(x = var_1276_cast_fp16, y = var_1277_to_fp16)[name = tensor("op_1278_cast_fp16")]; + tensor var_1280_equation_0 = const()[name = tensor("op_1280_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1090_cast_fp16, var_992_cast_fp16))[name = tensor("op_1280_cast_fp16")]; + tensor var_1281_to_fp16 = const()[name = tensor("op_1281_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1282_cast_fp16 = mul(x = var_1280_cast_fp16, y = var_1281_to_fp16)[name = tensor("op_1282_cast_fp16")]; + tensor var_1284_equation_0 = const()[name = tensor("op_1284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1284_cast_fp16 = einsum(equation = var_1284_equation_0, values = (var_1090_cast_fp16, var_996_cast_fp16))[name = tensor("op_1284_cast_fp16")]; + tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1286_cast_fp16 = mul(x = var_1284_cast_fp16, y = var_1285_to_fp16)[name = tensor("op_1286_cast_fp16")]; + tensor var_1288_equation_0 = const()[name = tensor("op_1288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1288_cast_fp16 = einsum(equation = var_1288_equation_0, values = (var_1090_cast_fp16, var_1000_cast_fp16))[name = tensor("op_1288_cast_fp16")]; + tensor var_1289_to_fp16 = const()[name = tensor("op_1289_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1290_cast_fp16 = mul(x = var_1288_cast_fp16, y = var_1289_to_fp16)[name = tensor("op_1290_cast_fp16")]; + tensor var_1292_equation_0 = const()[name = tensor("op_1292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1292_cast_fp16 = einsum(equation = var_1292_equation_0, values = (var_1102_cast_fp16, var_1004_cast_fp16))[name = tensor("op_1292_cast_fp16")]; + tensor var_1293_to_fp16 = const()[name = tensor("op_1293_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1294_cast_fp16 = mul(x = var_1292_cast_fp16, y = var_1293_to_fp16)[name = tensor("op_1294_cast_fp16")]; + tensor var_1296_equation_0 = const()[name = tensor("op_1296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1296_cast_fp16 = einsum(equation = var_1296_equation_0, values = (var_1102_cast_fp16, var_1008_cast_fp16))[name = tensor("op_1296_cast_fp16")]; + tensor var_1297_to_fp16 = const()[name = tensor("op_1297_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1298_cast_fp16 = mul(x = var_1296_cast_fp16, y = var_1297_to_fp16)[name = tensor("op_1298_cast_fp16")]; + tensor var_1300_equation_0 = const()[name = tensor("op_1300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1300_cast_fp16 = einsum(equation = var_1300_equation_0, values = (var_1102_cast_fp16, var_1012_cast_fp16))[name = tensor("op_1300_cast_fp16")]; + tensor var_1301_to_fp16 = const()[name = tensor("op_1301_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1302_cast_fp16 = mul(x = var_1300_cast_fp16, y = var_1301_to_fp16)[name = tensor("op_1302_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_1210_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_1214_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_1218_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_1222_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_1226_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_1230_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_1234_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_1238_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1242_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1246_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1250_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1254_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1258_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1262_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1266_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1270_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1274_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1278_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1282_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1286_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1290_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1294_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1298_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_1302_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_1327_cast_fp16 = softmax(axis = var_779, x = aw_49_cast_fp16)[name = tensor("op_1327_cast_fp16")]; + tensor var_1328_cast_fp16 = softmax(axis = var_779, x = aw_51_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1329_cast_fp16 = softmax(axis = var_779, x = aw_53_cast_fp16)[name = tensor("op_1329_cast_fp16")]; + tensor var_1330_cast_fp16 = softmax(axis = var_779, x = aw_55_cast_fp16)[name = tensor("op_1330_cast_fp16")]; + tensor var_1331_cast_fp16 = softmax(axis = var_779, x = aw_57_cast_fp16)[name = tensor("op_1331_cast_fp16")]; + tensor var_1332_cast_fp16 = softmax(axis = var_779, x = aw_59_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1333_cast_fp16 = softmax(axis = var_779, x = aw_61_cast_fp16)[name = tensor("op_1333_cast_fp16")]; + tensor var_1334_cast_fp16 = softmax(axis = var_779, x = aw_63_cast_fp16)[name = tensor("op_1334_cast_fp16")]; + tensor var_1335_cast_fp16 = softmax(axis = var_779, x = aw_65_cast_fp16)[name = tensor("op_1335_cast_fp16")]; + tensor var_1336_cast_fp16 = softmax(axis = var_779, x = aw_67_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1337_cast_fp16 = softmax(axis = var_779, x = aw_69_cast_fp16)[name = tensor("op_1337_cast_fp16")]; + tensor var_1338_cast_fp16 = softmax(axis = var_779, x = aw_71_cast_fp16)[name = tensor("op_1338_cast_fp16")]; + tensor var_1339_cast_fp16 = softmax(axis = var_779, x = aw_73_cast_fp16)[name = tensor("op_1339_cast_fp16")]; + tensor var_1340_cast_fp16 = softmax(axis = var_779, x = aw_75_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1341_cast_fp16 = softmax(axis = var_779, x = aw_77_cast_fp16)[name = tensor("op_1341_cast_fp16")]; + tensor var_1342_cast_fp16 = softmax(axis = var_779, x = aw_79_cast_fp16)[name = tensor("op_1342_cast_fp16")]; + tensor var_1343_cast_fp16 = softmax(axis = var_779, x = aw_81_cast_fp16)[name = tensor("op_1343_cast_fp16")]; + tensor var_1344_cast_fp16 = softmax(axis = var_779, x = aw_83_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1345_cast_fp16 = softmax(axis = var_779, x = aw_85_cast_fp16)[name = tensor("op_1345_cast_fp16")]; + tensor var_1346_cast_fp16 = softmax(axis = var_779, x = aw_87_cast_fp16)[name = tensor("op_1346_cast_fp16")]; + tensor var_1347_cast_fp16 = softmax(axis = var_779, x = aw_89_cast_fp16)[name = tensor("op_1347_cast_fp16")]; + tensor var_1348_cast_fp16 = softmax(axis = var_779, x = aw_91_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1349_cast_fp16 = softmax(axis = var_779, x = aw_93_cast_fp16)[name = tensor("op_1349_cast_fp16")]; + tensor var_1350_cast_fp16 = softmax(axis = var_779, x = aw_cast_fp16)[name = tensor("op_1350_cast_fp16")]; + tensor var_1352_equation_0 = const()[name = tensor("op_1352_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1352_cast_fp16 = einsum(equation = var_1352_equation_0, values = (var_1112_cast_fp16, var_1327_cast_fp16))[name = tensor("op_1352_cast_fp16")]; + tensor var_1354_equation_0 = const()[name = tensor("op_1354_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1354_cast_fp16 = einsum(equation = var_1354_equation_0, values = (var_1112_cast_fp16, var_1328_cast_fp16))[name = tensor("op_1354_cast_fp16")]; + tensor var_1356_equation_0 = const()[name = tensor("op_1356_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1356_cast_fp16 = einsum(equation = var_1356_equation_0, values = (var_1112_cast_fp16, var_1329_cast_fp16))[name = tensor("op_1356_cast_fp16")]; + tensor var_1358_equation_0 = const()[name = tensor("op_1358_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1358_cast_fp16 = einsum(equation = var_1358_equation_0, values = (var_1124_cast_fp16, var_1330_cast_fp16))[name = tensor("op_1358_cast_fp16")]; + tensor var_1360_equation_0 = const()[name = tensor("op_1360_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1360_cast_fp16 = einsum(equation = var_1360_equation_0, values = (var_1124_cast_fp16, var_1331_cast_fp16))[name = tensor("op_1360_cast_fp16")]; + tensor var_1362_equation_0 = const()[name = tensor("op_1362_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1362_cast_fp16 = einsum(equation = var_1362_equation_0, values = (var_1124_cast_fp16, var_1332_cast_fp16))[name = tensor("op_1362_cast_fp16")]; + tensor var_1364_equation_0 = const()[name = tensor("op_1364_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1364_cast_fp16 = einsum(equation = var_1364_equation_0, values = (var_1136_cast_fp16, var_1333_cast_fp16))[name = tensor("op_1364_cast_fp16")]; + tensor var_1366_equation_0 = const()[name = tensor("op_1366_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1366_cast_fp16 = einsum(equation = var_1366_equation_0, values = (var_1136_cast_fp16, var_1334_cast_fp16))[name = tensor("op_1366_cast_fp16")]; + tensor var_1368_equation_0 = const()[name = tensor("op_1368_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1368_cast_fp16 = einsum(equation = var_1368_equation_0, values = (var_1136_cast_fp16, var_1335_cast_fp16))[name = tensor("op_1368_cast_fp16")]; + tensor var_1370_equation_0 = const()[name = tensor("op_1370_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1370_cast_fp16 = einsum(equation = var_1370_equation_0, values = (var_1148_cast_fp16, var_1336_cast_fp16))[name = tensor("op_1370_cast_fp16")]; + tensor var_1372_equation_0 = const()[name = tensor("op_1372_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1372_cast_fp16 = einsum(equation = var_1372_equation_0, values = (var_1148_cast_fp16, var_1337_cast_fp16))[name = tensor("op_1372_cast_fp16")]; + tensor var_1374_equation_0 = const()[name = tensor("op_1374_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1374_cast_fp16 = einsum(equation = var_1374_equation_0, values = (var_1148_cast_fp16, var_1338_cast_fp16))[name = tensor("op_1374_cast_fp16")]; + tensor var_1376_equation_0 = const()[name = tensor("op_1376_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1376_cast_fp16 = einsum(equation = var_1376_equation_0, values = (var_1160_cast_fp16, var_1339_cast_fp16))[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_equation_0 = const()[name = tensor("op_1378_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1378_cast_fp16 = einsum(equation = var_1378_equation_0, values = (var_1160_cast_fp16, var_1340_cast_fp16))[name = tensor("op_1378_cast_fp16")]; + tensor var_1380_equation_0 = const()[name = tensor("op_1380_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1380_cast_fp16 = einsum(equation = var_1380_equation_0, values = (var_1160_cast_fp16, var_1341_cast_fp16))[name = tensor("op_1380_cast_fp16")]; + tensor var_1382_equation_0 = const()[name = tensor("op_1382_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1382_cast_fp16 = einsum(equation = var_1382_equation_0, values = (var_1172_cast_fp16, var_1342_cast_fp16))[name = tensor("op_1382_cast_fp16")]; + tensor var_1384_equation_0 = const()[name = tensor("op_1384_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1384_cast_fp16 = einsum(equation = var_1384_equation_0, values = (var_1172_cast_fp16, var_1343_cast_fp16))[name = tensor("op_1384_cast_fp16")]; + tensor var_1386_equation_0 = const()[name = tensor("op_1386_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1386_cast_fp16 = einsum(equation = var_1386_equation_0, values = (var_1172_cast_fp16, var_1344_cast_fp16))[name = tensor("op_1386_cast_fp16")]; + tensor var_1388_equation_0 = const()[name = tensor("op_1388_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1388_cast_fp16 = einsum(equation = var_1388_equation_0, values = (var_1184_cast_fp16, var_1345_cast_fp16))[name = tensor("op_1388_cast_fp16")]; + tensor var_1390_equation_0 = const()[name = tensor("op_1390_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1390_cast_fp16 = einsum(equation = var_1390_equation_0, values = (var_1184_cast_fp16, var_1346_cast_fp16))[name = tensor("op_1390_cast_fp16")]; + tensor var_1392_equation_0 = const()[name = tensor("op_1392_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1392_cast_fp16 = einsum(equation = var_1392_equation_0, values = (var_1184_cast_fp16, var_1347_cast_fp16))[name = tensor("op_1392_cast_fp16")]; + tensor var_1394_equation_0 = const()[name = tensor("op_1394_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1394_cast_fp16 = einsum(equation = var_1394_equation_0, values = (var_1196_cast_fp16, var_1348_cast_fp16))[name = tensor("op_1394_cast_fp16")]; + tensor var_1396_equation_0 = const()[name = tensor("op_1396_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1396_cast_fp16 = einsum(equation = var_1396_equation_0, values = (var_1196_cast_fp16, var_1349_cast_fp16))[name = tensor("op_1396_cast_fp16")]; + tensor var_1398_equation_0 = const()[name = tensor("op_1398_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1398_cast_fp16 = einsum(equation = var_1398_equation_0, values = (var_1196_cast_fp16, var_1350_cast_fp16))[name = tensor("op_1398_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_779, interleave = x_27_interleave_0, values = (var_1352_cast_fp16, var_1354_cast_fp16, var_1356_cast_fp16, var_1358_cast_fp16, var_1360_cast_fp16, var_1362_cast_fp16, var_1364_cast_fp16, var_1366_cast_fp16, var_1368_cast_fp16, var_1370_cast_fp16, var_1372_cast_fp16, var_1374_cast_fp16, var_1376_cast_fp16, var_1378_cast_fp16, var_1380_cast_fp16, var_1382_cast_fp16, var_1384_cast_fp16, var_1386_cast_fp16, var_1388_cast_fp16, var_1390_cast_fp16, var_1392_cast_fp16, var_1394_cast_fp16, var_1396_cast_fp16, var_1398_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1403 = const()[name = tensor("op_1403"), val = tensor([1, 3072, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1403, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1406 = const()[name = tensor("op_1406"), val = tensor([1, 1])]; + tensor var_1408 = const()[name = tensor("op_1408"), val = tensor([1, 1])]; + tensor attention_output_pad_type_0 = const()[name = tensor("attention_output_pad_type_0"), val = tensor("custom")]; + tensor attention_output_pad_0 = const()[name = tensor("attention_output_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232803776)))]; + tensor attention_output_cast_fp16 = conv(dilations = var_1408, groups = var_779, pad = attention_output_pad_0, pad_type = attention_output_pad_type_0, strides = var_1406, weight = blocks_1_attn_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678208)))]; + tensor x_eps_cast_fp16 = concat(axis = var_779, interleave = x_eps_interleave_0, values = (x_29_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_782, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1434_to_fp16 = const()[name = tensor("op_1434_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1434_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678400)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1445 = const()[name = tensor("op_1445"), val = tensor([1, 1])]; + tensor var_1447 = const()[name = tensor("op_1447"), val = tensor([1, 1])]; + tensor input_17_pad_type_0 = const()[name = tensor("input_17_pad_type_0"), val = tensor("custom")]; + tensor input_17_pad_0 = const()[name = tensor("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251684608)))]; + tensor input_17_cast_fp16 = conv(dilations = var_1447, groups = var_779, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_1445, weight = blocks_1_mlp_fc_1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1451 = const()[name = tensor("op_1451"), val = tensor([1, 1])]; + tensor var_1453 = const()[name = tensor("op_1453"), val = tensor([1, 1])]; + tensor x_fc_2_pad_type_0 = const()[name = tensor("x_fc_2_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_pad_0 = const()[name = tensor("x_fc_2_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302016320)))]; + tensor x_fc_2_cast_fp16 = conv(dilations = var_1453, groups = var_779, pad = x_fc_2_pad_0, pad_type = x_fc_2_pad_type_0, strides = var_1451, weight = blocks_1_mlp_fc_2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_1456_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1456_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_1456_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_1459 = const()[name = tensor("op_1459"), val = tensor([1, 1])]; + tensor var_1461 = const()[name = tensor("op_1461"), val = tensor([1, 1])]; + tensor var_1463_pad_type_0 = const()[name = tensor("op_1463_pad_type_0"), val = tensor("custom")]; + tensor var_1463_pad_0 = const()[name = tensor("op_1463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(352348032)))]; + tensor var_1463_cast_fp16 = conv(dilations = var_1461, groups = var_779, pad = var_1463_pad_0, pad_type = var_1463_pad_type_0, strides = var_1459, weight = blocks_1_mlp_proj_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_1463_cast_fp16")]; + tensor new_x = add(x = var_1463_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_1464_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_v_cache_0, new_k_cache_1, new_v_cache_1); +} \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk10.mlmodelc/weights/weight.bin b/Llama-3.2-3B-Instruct_chunk10.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..69e886b3711c8507e63bb0a32cb390c36f5a9777 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk10.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:943564e130d797200225f5eaeef339030c6c3c01691819963fe7ef78303c8545 +size 402679744 diff --git a/Llama-3.2-3B-Instruct_chunk11.mlmodelc/analytics/coremldata.bin b/Llama-3.2-3B-Instruct_chunk11.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a63af39cde8e590e41fffd270ab8aede737490d --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk11.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf21e446e7587de3fd840eae95f3e79729298df568725552f7ef5fd8f954e58c +size 243 diff --git a/Llama-3.2-3B-Instruct_chunk11.mlmodelc/coremldata.bin b/Llama-3.2-3B-Instruct_chunk11.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef844658693d8a7fc2951abf2761f8f5f9bc62c3 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk11.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8129d684aa1ea8b76708a186fe44f7ffc4aa08b4854907105fe41c0825e71875 +size 653 diff --git a/Llama-3.2-3B-Instruct_chunk11.mlmodelc/metadata.json b/Llama-3.2-3B-Instruct_chunk11.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..45870338d8d760107fb047595a2394be13aca491 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk11.mlmodelc/metadata.json @@ -0,0 +1,178 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 14, + "Ios16.mul" : 70, + "SliceByIndex" : 88, + "Transpose" : 2, + "Ios16.einsum" : 96, + "Ios16.conv" : 14, + "Ios16.add" : 56, + "Ios16.realDiv" : 4, + "Ios16.softmax" : 48, + "Ios16.reduceL2Norm" : 4, + "Ios16.reshape" : 14, + "Ios16.silu" : 2 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 512, 1, 64]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_3_2_3B_Instruct_2024_11_09_16_14_37_chunk11", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk11.mlmodelc/model.mil b/Llama-3.2-3B-Instruct_chunk11.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..78594b4291dc45ae43652f9a31200581b19ad3c6 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk11.mlmodelc/model.mil @@ -0,0 +1,956 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { + tensor var_13 = const()[name = tensor("op_13"), val = tensor(-1)]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-2)]; + tensor var_19 = const()[name = tensor("op_19"), val = tensor(-3)]; + tensor var_52 = const()[name = tensor("op_52"), val = tensor(1)]; + tensor var_55 = const()[name = tensor("op_55"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_52, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_55, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_79_to_fp16 = const()[name = tensor("op_79_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_79_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_100 = const()[name = tensor("op_100"), val = tensor([1, 3072, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_100, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_103 = const()[name = tensor("op_103"), val = tensor([1, 1])]; + tensor var_105 = const()[name = tensor("op_105"), val = tensor([1, 1])]; + tensor q_1_pad_type_0 = const()[name = tensor("q_1_pad_type_0"), val = tensor("custom")]; + tensor q_1_pad_0 = const()[name = tensor("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6464)))]; + tensor q_1_cast_fp16 = conv(dilations = var_105, groups = var_52, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = var_103, weight = blocks_0_attn_q_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_109 = const()[name = tensor("op_109"), val = tensor([1, 1])]; + tensor var_111 = const()[name = tensor("op_111"), val = tensor([1, 1])]; + tensor k_1_pad_type_0 = const()[name = tensor("k_1_pad_type_0"), val = tensor("custom")]; + tensor k_1_pad_0 = const()[name = tensor("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18880896)))]; + tensor k_1_cast_fp16 = conv(dilations = var_111, groups = var_52, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = var_109, weight = blocks_0_attn_k_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_115 = const()[name = tensor("op_115"), val = tensor([1, 1])]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor v_1_pad_type_0 = const()[name = tensor("v_1_pad_type_0"), val = tensor("custom")]; + tensor v_1_pad_0 = const()[name = tensor("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25172416)))]; + tensor v_1_cast_fp16 = conv(dilations = var_117, groups = var_52, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = var_115, weight = blocks_0_attn_v_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_120 = const()[name = tensor("op_120"), val = tensor([1, 24, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_120, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_122 = const()[name = tensor("op_122"), val = tensor([1, -1, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_122, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_136_begin_0 = const()[name = tensor("op_136_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_136_end_0 = const()[name = tensor("op_136_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_136_end_mask_0 = const()[name = tensor("op_136_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_136_cast_fp16 = slice_by_index(begin = var_136_begin_0, end = var_136_end_0, end_mask = var_136_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_136_cast_fp16")]; + tensor var_142_begin_0 = const()[name = tensor("op_142_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_142_end_0 = const()[name = tensor("op_142_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_142_end_mask_0 = const()[name = tensor("op_142_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_142_cast_fp16 = slice_by_index(begin = var_142_begin_0, end = var_142_end_0, end_mask = var_142_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_142_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_144_cast_fp16 = mul(x = var_142_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_144_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_17, interleave = rotated_1_interleave_0, values = (var_144_cast_fp16, var_136_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_147_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_147_cast_fp16")]; + tensor var_148_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_148_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_147_cast_fp16, y = var_148_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor var_167_begin_0 = const()[name = tensor("op_167_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_167_end_0 = const()[name = tensor("op_167_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_167_end_mask_0 = const()[name = tensor("op_167_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_167_cast_fp16 = slice_by_index(begin = var_167_begin_0, end = var_167_end_0, end_mask = var_167_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_167_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_169_cast_fp16 = mul(x = var_167_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_169_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_17, interleave = rotated_3_interleave_0, values = (var_169_cast_fp16, var_161_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_172_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_172_cast_fp16")]; + tensor var_173_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_173_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_172_cast_fp16, y = var_173_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_176 = const()[name = tensor("op_176"), val = tensor([1, -1, 1, 64])]; + tensor k_7_cast_fp16 = reshape(shape = var_176, x = roped_3_cast_fp16)[name = tensor("k_7_cast_fp16")]; + tensor var_178 = const()[name = tensor("op_178"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_178, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_11_interleave_0 = const()[name = tensor("k_11_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_9_perm_0, x = k_7_cast_fp16)[name = tensor("transpose_1")]; + tensor k_11_cast_fp16 = concat(axis = var_19, interleave = k_11_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_11_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_13, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1, 3072, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_186, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_191_begin_0 = const()[name = tensor("op_191_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_191_end_0 = const()[name = tensor("op_191_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_191_end_mask_0 = const()[name = tensor("op_191_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_191_cast_fp16 = slice_by_index(begin = var_191_begin_0, end = var_191_end_0, end_mask = var_191_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_191_cast_fp16")]; + tensor var_195_begin_0 = const()[name = tensor("op_195_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_195_end_0 = const()[name = tensor("op_195_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_195_end_mask_0 = const()[name = tensor("op_195_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_195_cast_fp16 = slice_by_index(begin = var_195_begin_0, end = var_195_end_0, end_mask = var_195_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_195_cast_fp16")]; + tensor var_199_begin_0 = const()[name = tensor("op_199_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_199_end_0 = const()[name = tensor("op_199_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_199_end_mask_0 = const()[name = tensor("op_199_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_199_cast_fp16 = slice_by_index(begin = var_199_begin_0, end = var_199_end_0, end_mask = var_199_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_199_cast_fp16")]; + tensor var_203_begin_0 = const()[name = tensor("op_203_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_203_end_0 = const()[name = tensor("op_203_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_203_end_mask_0 = const()[name = tensor("op_203_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_203_cast_fp16 = slice_by_index(begin = var_203_begin_0, end = var_203_end_0, end_mask = var_203_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_203_cast_fp16")]; + tensor var_207_begin_0 = const()[name = tensor("op_207_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_207_end_0 = const()[name = tensor("op_207_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_207_end_mask_0 = const()[name = tensor("op_207_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = var_207_end_0, end_mask = var_207_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_207_cast_fp16")]; + tensor var_211_begin_0 = const()[name = tensor("op_211_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_211_end_0 = const()[name = tensor("op_211_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_211_end_mask_0 = const()[name = tensor("op_211_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_211_cast_fp16 = slice_by_index(begin = var_211_begin_0, end = var_211_end_0, end_mask = var_211_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_211_cast_fp16")]; + tensor var_215_begin_0 = const()[name = tensor("op_215_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_215_end_0 = const()[name = tensor("op_215_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_215_end_mask_0 = const()[name = tensor("op_215_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_215_cast_fp16 = slice_by_index(begin = var_215_begin_0, end = var_215_end_0, end_mask = var_215_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_215_cast_fp16")]; + tensor var_219_begin_0 = const()[name = tensor("op_219_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_219_end_0 = const()[name = tensor("op_219_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_219_end_mask_0 = const()[name = tensor("op_219_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_219_cast_fp16 = slice_by_index(begin = var_219_begin_0, end = var_219_end_0, end_mask = var_219_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_219_cast_fp16")]; + tensor var_223_begin_0 = const()[name = tensor("op_223_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_223_end_0 = const()[name = tensor("op_223_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_223_end_mask_0 = const()[name = tensor("op_223_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_223_cast_fp16 = slice_by_index(begin = var_223_begin_0, end = var_223_end_0, end_mask = var_223_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_223_cast_fp16")]; + tensor var_227_begin_0 = const()[name = tensor("op_227_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_227_end_0 = const()[name = tensor("op_227_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_227_end_mask_0 = const()[name = tensor("op_227_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_227_cast_fp16")]; + tensor var_231_begin_0 = const()[name = tensor("op_231_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_231_end_0 = const()[name = tensor("op_231_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_231_end_mask_0 = const()[name = tensor("op_231_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_231_cast_fp16 = slice_by_index(begin = var_231_begin_0, end = var_231_end_0, end_mask = var_231_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_231_cast_fp16")]; + tensor var_235_begin_0 = const()[name = tensor("op_235_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_235_end_0 = const()[name = tensor("op_235_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_235_end_mask_0 = const()[name = tensor("op_235_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = var_235_end_0, end_mask = var_235_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_235_cast_fp16")]; + tensor var_239_begin_0 = const()[name = tensor("op_239_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_239_end_0 = const()[name = tensor("op_239_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_239_end_mask_0 = const()[name = tensor("op_239_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_239_cast_fp16")]; + tensor var_243_begin_0 = const()[name = tensor("op_243_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_243_end_0 = const()[name = tensor("op_243_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_243_end_mask_0 = const()[name = tensor("op_243_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_243_cast_fp16")]; + tensor var_247_begin_0 = const()[name = tensor("op_247_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_247_end_0 = const()[name = tensor("op_247_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_247_end_mask_0 = const()[name = tensor("op_247_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_247_cast_fp16")]; + tensor var_251_begin_0 = const()[name = tensor("op_251_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_251_end_0 = const()[name = tensor("op_251_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_251_end_mask_0 = const()[name = tensor("op_251_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_251_cast_fp16")]; + tensor var_255_begin_0 = const()[name = tensor("op_255_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_255_end_0 = const()[name = tensor("op_255_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_255_end_mask_0 = const()[name = tensor("op_255_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_255_cast_fp16")]; + tensor var_259_begin_0 = const()[name = tensor("op_259_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_259_end_0 = const()[name = tensor("op_259_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_259_end_mask_0 = const()[name = tensor("op_259_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_263_cast_fp16")]; + tensor var_267_begin_0 = const()[name = tensor("op_267_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_267_end_0 = const()[name = tensor("op_267_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_267_end_mask_0 = const()[name = tensor("op_267_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_267_cast_fp16")]; + tensor var_271_begin_0 = const()[name = tensor("op_271_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_271_end_0 = const()[name = tensor("op_271_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_271_end_mask_0 = const()[name = tensor("op_271_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_271_cast_fp16")]; + tensor var_275_begin_0 = const()[name = tensor("op_275_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_275_end_0 = const()[name = tensor("op_275_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_275_end_mask_0 = const()[name = tensor("op_275_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_275_cast_fp16")]; + tensor var_279_begin_0 = const()[name = tensor("op_279_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_279_end_0 = const()[name = tensor("op_279_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_279_end_mask_0 = const()[name = tensor("op_279_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_279_cast_fp16")]; + tensor var_283_begin_0 = const()[name = tensor("op_283_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_283_end_0 = const()[name = tensor("op_283_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_283_end_mask_0 = const()[name = tensor("op_283_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_283_cast_fp16")]; + tensor var_289_begin_0 = const()[name = tensor("op_289_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_289_end_0 = const()[name = tensor("op_289_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_289_end_mask_0 = const()[name = tensor("op_289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_289_cast_fp16")]; + tensor var_301_begin_0 = const()[name = tensor("op_301_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_301_end_0 = const()[name = tensor("op_301_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_301_end_mask_0 = const()[name = tensor("op_301_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_301_cast_fp16")]; + tensor var_313_begin_0 = const()[name = tensor("op_313_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_313_end_0 = const()[name = tensor("op_313_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_313_end_mask_0 = const()[name = tensor("op_313_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_313_cast_fp16 = slice_by_index(begin = var_313_begin_0, end = var_313_end_0, end_mask = var_313_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_313_cast_fp16")]; + tensor var_325_begin_0 = const()[name = tensor("op_325_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_325_end_0 = const()[name = tensor("op_325_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_325_end_mask_0 = const()[name = tensor("op_325_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_325_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_349_begin_0 = const()[name = tensor("op_349_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_349_end_0 = const()[name = tensor("op_349_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_349_end_mask_0 = const()[name = tensor("op_349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_349_cast_fp16 = slice_by_index(begin = var_349_begin_0, end = var_349_end_0, end_mask = var_349_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_349_cast_fp16")]; + tensor var_361_begin_0 = const()[name = tensor("op_361_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_361_end_0 = const()[name = tensor("op_361_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_361_end_mask_0 = const()[name = tensor("op_361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_361_cast_fp16 = slice_by_index(begin = var_361_begin_0, end = var_361_end_0, end_mask = var_361_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_361_cast_fp16")]; + tensor var_373_begin_0 = const()[name = tensor("op_373_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_373_end_0 = const()[name = tensor("op_373_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_373_end_mask_0 = const()[name = tensor("op_373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_373_cast_fp16")]; + tensor var_383_begin_0 = const()[name = tensor("op_383_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_383_end_0 = const()[name = tensor("op_383_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_383_end_mask_0 = const()[name = tensor("op_383_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_383_cast_fp16")]; + tensor var_395_begin_0 = const()[name = tensor("op_395_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_395_end_0 = const()[name = tensor("op_395_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_395_end_mask_0 = const()[name = tensor("op_395_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_395_cast_fp16")]; + tensor var_407_begin_0 = const()[name = tensor("op_407_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_407_end_0 = const()[name = tensor("op_407_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_407_end_mask_0 = const()[name = tensor("op_407_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_407_cast_fp16 = slice_by_index(begin = var_407_begin_0, end = var_407_end_0, end_mask = var_407_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_407_cast_fp16")]; + tensor var_419_begin_0 = const()[name = tensor("op_419_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_419_end_0 = const()[name = tensor("op_419_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_419_end_mask_0 = const()[name = tensor("op_419_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_419_cast_fp16 = slice_by_index(begin = var_419_begin_0, end = var_419_end_0, end_mask = var_419_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_419_cast_fp16")]; + tensor var_431_begin_0 = const()[name = tensor("op_431_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_431_end_0 = const()[name = tensor("op_431_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_431_end_mask_0 = const()[name = tensor("op_431_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_431_cast_fp16 = slice_by_index(begin = var_431_begin_0, end = var_431_end_0, end_mask = var_431_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_431_cast_fp16")]; + tensor var_443_begin_0 = const()[name = tensor("op_443_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_443_end_0 = const()[name = tensor("op_443_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_443_end_mask_0 = const()[name = tensor("op_443_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_443_cast_fp16 = slice_by_index(begin = var_443_begin_0, end = var_443_end_0, end_mask = var_443_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_443_cast_fp16")]; + tensor var_455_begin_0 = const()[name = tensor("op_455_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_455_end_0 = const()[name = tensor("op_455_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_455_end_mask_0 = const()[name = tensor("op_455_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_455_cast_fp16")]; + tensor var_467_begin_0 = const()[name = tensor("op_467_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_467_end_0 = const()[name = tensor("op_467_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_467_end_mask_0 = const()[name = tensor("op_467_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_467_cast_fp16 = slice_by_index(begin = var_467_begin_0, end = var_467_end_0, end_mask = var_467_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_467_cast_fp16")]; + tensor var_479_equation_0 = const()[name = tensor("op_479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_289_cast_fp16, var_191_cast_fp16))[name = tensor("op_479_cast_fp16")]; + tensor var_480_to_fp16 = const()[name = tensor("op_480_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_481_cast_fp16 = mul(x = var_479_cast_fp16, y = var_480_to_fp16)[name = tensor("op_481_cast_fp16")]; + tensor var_483_equation_0 = const()[name = tensor("op_483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_289_cast_fp16, var_195_cast_fp16))[name = tensor("op_483_cast_fp16")]; + tensor var_484_to_fp16 = const()[name = tensor("op_484_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_485_cast_fp16 = mul(x = var_483_cast_fp16, y = var_484_to_fp16)[name = tensor("op_485_cast_fp16")]; + tensor var_487_equation_0 = const()[name = tensor("op_487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_487_cast_fp16 = einsum(equation = var_487_equation_0, values = (var_289_cast_fp16, var_199_cast_fp16))[name = tensor("op_487_cast_fp16")]; + tensor var_488_to_fp16 = const()[name = tensor("op_488_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_489_cast_fp16 = mul(x = var_487_cast_fp16, y = var_488_to_fp16)[name = tensor("op_489_cast_fp16")]; + tensor var_491_equation_0 = const()[name = tensor("op_491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_491_cast_fp16 = einsum(equation = var_491_equation_0, values = (var_301_cast_fp16, var_203_cast_fp16))[name = tensor("op_491_cast_fp16")]; + tensor var_492_to_fp16 = const()[name = tensor("op_492_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_493_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = tensor("op_493_cast_fp16")]; + tensor var_495_equation_0 = const()[name = tensor("op_495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_495_cast_fp16 = einsum(equation = var_495_equation_0, values = (var_301_cast_fp16, var_207_cast_fp16))[name = tensor("op_495_cast_fp16")]; + tensor var_496_to_fp16 = const()[name = tensor("op_496_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_497_cast_fp16 = mul(x = var_495_cast_fp16, y = var_496_to_fp16)[name = tensor("op_497_cast_fp16")]; + tensor var_499_equation_0 = const()[name = tensor("op_499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_499_cast_fp16 = einsum(equation = var_499_equation_0, values = (var_301_cast_fp16, var_211_cast_fp16))[name = tensor("op_499_cast_fp16")]; + tensor var_500_to_fp16 = const()[name = tensor("op_500_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_501_cast_fp16 = mul(x = var_499_cast_fp16, y = var_500_to_fp16)[name = tensor("op_501_cast_fp16")]; + tensor var_503_equation_0 = const()[name = tensor("op_503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_503_cast_fp16 = einsum(equation = var_503_equation_0, values = (var_313_cast_fp16, var_215_cast_fp16))[name = tensor("op_503_cast_fp16")]; + tensor var_504_to_fp16 = const()[name = tensor("op_504_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_505_cast_fp16 = mul(x = var_503_cast_fp16, y = var_504_to_fp16)[name = tensor("op_505_cast_fp16")]; + tensor var_507_equation_0 = const()[name = tensor("op_507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_507_cast_fp16 = einsum(equation = var_507_equation_0, values = (var_313_cast_fp16, var_219_cast_fp16))[name = tensor("op_507_cast_fp16")]; + tensor var_508_to_fp16 = const()[name = tensor("op_508_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16, y = var_508_to_fp16)[name = tensor("op_509_cast_fp16")]; + tensor var_511_equation_0 = const()[name = tensor("op_511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_511_cast_fp16 = einsum(equation = var_511_equation_0, values = (var_313_cast_fp16, var_223_cast_fp16))[name = tensor("op_511_cast_fp16")]; + tensor var_512_to_fp16 = const()[name = tensor("op_512_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_513_cast_fp16 = mul(x = var_511_cast_fp16, y = var_512_to_fp16)[name = tensor("op_513_cast_fp16")]; + tensor var_515_equation_0 = const()[name = tensor("op_515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_515_cast_fp16 = einsum(equation = var_515_equation_0, values = (var_325_cast_fp16, var_227_cast_fp16))[name = tensor("op_515_cast_fp16")]; + tensor var_516_to_fp16 = const()[name = tensor("op_516_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_517_cast_fp16 = mul(x = var_515_cast_fp16, y = var_516_to_fp16)[name = tensor("op_517_cast_fp16")]; + tensor var_519_equation_0 = const()[name = tensor("op_519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_519_cast_fp16 = einsum(equation = var_519_equation_0, values = (var_325_cast_fp16, var_231_cast_fp16))[name = tensor("op_519_cast_fp16")]; + tensor var_520_to_fp16 = const()[name = tensor("op_520_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_521_cast_fp16 = mul(x = var_519_cast_fp16, y = var_520_to_fp16)[name = tensor("op_521_cast_fp16")]; + tensor var_523_equation_0 = const()[name = tensor("op_523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_325_cast_fp16, var_235_cast_fp16))[name = tensor("op_523_cast_fp16")]; + tensor var_524_to_fp16 = const()[name = tensor("op_524_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_525_cast_fp16 = mul(x = var_523_cast_fp16, y = var_524_to_fp16)[name = tensor("op_525_cast_fp16")]; + tensor var_527_equation_0 = const()[name = tensor("op_527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_337_cast_fp16, var_239_cast_fp16))[name = tensor("op_527_cast_fp16")]; + tensor var_528_to_fp16 = const()[name = tensor("op_528_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_529_cast_fp16 = mul(x = var_527_cast_fp16, y = var_528_to_fp16)[name = tensor("op_529_cast_fp16")]; + tensor var_531_equation_0 = const()[name = tensor("op_531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_337_cast_fp16, var_243_cast_fp16))[name = tensor("op_531_cast_fp16")]; + tensor var_532_to_fp16 = const()[name = tensor("op_532_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_533_cast_fp16 = mul(x = var_531_cast_fp16, y = var_532_to_fp16)[name = tensor("op_533_cast_fp16")]; + tensor var_535_equation_0 = const()[name = tensor("op_535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_337_cast_fp16, var_247_cast_fp16))[name = tensor("op_535_cast_fp16")]; + tensor var_536_to_fp16 = const()[name = tensor("op_536_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_537_cast_fp16 = mul(x = var_535_cast_fp16, y = var_536_to_fp16)[name = tensor("op_537_cast_fp16")]; + tensor var_539_equation_0 = const()[name = tensor("op_539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_539_cast_fp16 = einsum(equation = var_539_equation_0, values = (var_349_cast_fp16, var_251_cast_fp16))[name = tensor("op_539_cast_fp16")]; + tensor var_540_to_fp16 = const()[name = tensor("op_540_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_541_cast_fp16 = mul(x = var_539_cast_fp16, y = var_540_to_fp16)[name = tensor("op_541_cast_fp16")]; + tensor var_543_equation_0 = const()[name = tensor("op_543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_543_cast_fp16 = einsum(equation = var_543_equation_0, values = (var_349_cast_fp16, var_255_cast_fp16))[name = tensor("op_543_cast_fp16")]; + tensor var_544_to_fp16 = const()[name = tensor("op_544_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_545_cast_fp16 = mul(x = var_543_cast_fp16, y = var_544_to_fp16)[name = tensor("op_545_cast_fp16")]; + tensor var_547_equation_0 = const()[name = tensor("op_547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_349_cast_fp16, var_259_cast_fp16))[name = tensor("op_547_cast_fp16")]; + tensor var_548_to_fp16 = const()[name = tensor("op_548_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_549_cast_fp16 = mul(x = var_547_cast_fp16, y = var_548_to_fp16)[name = tensor("op_549_cast_fp16")]; + tensor var_551_equation_0 = const()[name = tensor("op_551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_361_cast_fp16, var_263_cast_fp16))[name = tensor("op_551_cast_fp16")]; + tensor var_552_to_fp16 = const()[name = tensor("op_552_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_553_cast_fp16 = mul(x = var_551_cast_fp16, y = var_552_to_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_555_equation_0 = const()[name = tensor("op_555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_361_cast_fp16, var_267_cast_fp16))[name = tensor("op_555_cast_fp16")]; + tensor var_556_to_fp16 = const()[name = tensor("op_556_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_557_cast_fp16 = mul(x = var_555_cast_fp16, y = var_556_to_fp16)[name = tensor("op_557_cast_fp16")]; + tensor var_559_equation_0 = const()[name = tensor("op_559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_361_cast_fp16, var_271_cast_fp16))[name = tensor("op_559_cast_fp16")]; + tensor var_560_to_fp16 = const()[name = tensor("op_560_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = var_560_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor var_563_equation_0 = const()[name = tensor("op_563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_373_cast_fp16, var_275_cast_fp16))[name = tensor("op_563_cast_fp16")]; + tensor var_564_to_fp16 = const()[name = tensor("op_564_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_565_cast_fp16 = mul(x = var_563_cast_fp16, y = var_564_to_fp16)[name = tensor("op_565_cast_fp16")]; + tensor var_567_equation_0 = const()[name = tensor("op_567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_373_cast_fp16, var_279_cast_fp16))[name = tensor("op_567_cast_fp16")]; + tensor var_568_to_fp16 = const()[name = tensor("op_568_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_569_cast_fp16 = mul(x = var_567_cast_fp16, y = var_568_to_fp16)[name = tensor("op_569_cast_fp16")]; + tensor var_571_equation_0 = const()[name = tensor("op_571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_373_cast_fp16, var_283_cast_fp16))[name = tensor("op_571_cast_fp16")]; + tensor var_572_to_fp16 = const()[name = tensor("op_572_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_573_cast_fp16 = mul(x = var_571_cast_fp16, y = var_572_to_fp16)[name = tensor("op_573_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_481_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_485_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_489_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_493_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_497_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_501_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_505_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_509_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_513_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_517_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_521_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_525_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_529_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_533_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_537_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_541_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_545_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_549_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_553_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_557_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_561_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_565_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_569_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_573_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor var_598_cast_fp16 = softmax(axis = var_52, x = aw_1_cast_fp16)[name = tensor("op_598_cast_fp16")]; + tensor var_599_cast_fp16 = softmax(axis = var_52, x = aw_3_cast_fp16)[name = tensor("op_599_cast_fp16")]; + tensor var_600_cast_fp16 = softmax(axis = var_52, x = aw_5_cast_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_601_cast_fp16 = softmax(axis = var_52, x = aw_7_cast_fp16)[name = tensor("op_601_cast_fp16")]; + tensor var_602_cast_fp16 = softmax(axis = var_52, x = aw_9_cast_fp16)[name = tensor("op_602_cast_fp16")]; + tensor var_603_cast_fp16 = softmax(axis = var_52, x = aw_11_cast_fp16)[name = tensor("op_603_cast_fp16")]; + tensor var_604_cast_fp16 = softmax(axis = var_52, x = aw_13_cast_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_605_cast_fp16 = softmax(axis = var_52, x = aw_15_cast_fp16)[name = tensor("op_605_cast_fp16")]; + tensor var_606_cast_fp16 = softmax(axis = var_52, x = aw_17_cast_fp16)[name = tensor("op_606_cast_fp16")]; + tensor var_607_cast_fp16 = softmax(axis = var_52, x = aw_19_cast_fp16)[name = tensor("op_607_cast_fp16")]; + tensor var_608_cast_fp16 = softmax(axis = var_52, x = aw_21_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_609_cast_fp16 = softmax(axis = var_52, x = aw_23_cast_fp16)[name = tensor("op_609_cast_fp16")]; + tensor var_610_cast_fp16 = softmax(axis = var_52, x = aw_25_cast_fp16)[name = tensor("op_610_cast_fp16")]; + tensor var_611_cast_fp16 = softmax(axis = var_52, x = aw_27_cast_fp16)[name = tensor("op_611_cast_fp16")]; + tensor var_612_cast_fp16 = softmax(axis = var_52, x = aw_29_cast_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_613_cast_fp16 = softmax(axis = var_52, x = aw_31_cast_fp16)[name = tensor("op_613_cast_fp16")]; + tensor var_614_cast_fp16 = softmax(axis = var_52, x = aw_33_cast_fp16)[name = tensor("op_614_cast_fp16")]; + tensor var_615_cast_fp16 = softmax(axis = var_52, x = aw_35_cast_fp16)[name = tensor("op_615_cast_fp16")]; + tensor var_616_cast_fp16 = softmax(axis = var_52, x = aw_37_cast_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_617_cast_fp16 = softmax(axis = var_52, x = aw_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618_cast_fp16 = softmax(axis = var_52, x = aw_41_cast_fp16)[name = tensor("op_618_cast_fp16")]; + tensor var_619_cast_fp16 = softmax(axis = var_52, x = aw_43_cast_fp16)[name = tensor("op_619_cast_fp16")]; + tensor var_620_cast_fp16 = softmax(axis = var_52, x = aw_45_cast_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_621_cast_fp16 = softmax(axis = var_52, x = aw_47_cast_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_623_equation_0 = const()[name = tensor("op_623_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_383_cast_fp16, var_598_cast_fp16))[name = tensor("op_623_cast_fp16")]; + tensor var_625_equation_0 = const()[name = tensor("op_625_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_383_cast_fp16, var_599_cast_fp16))[name = tensor("op_625_cast_fp16")]; + tensor var_627_equation_0 = const()[name = tensor("op_627_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_383_cast_fp16, var_600_cast_fp16))[name = tensor("op_627_cast_fp16")]; + tensor var_629_equation_0 = const()[name = tensor("op_629_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_395_cast_fp16, var_601_cast_fp16))[name = tensor("op_629_cast_fp16")]; + tensor var_631_equation_0 = const()[name = tensor("op_631_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_395_cast_fp16, var_602_cast_fp16))[name = tensor("op_631_cast_fp16")]; + tensor var_633_equation_0 = const()[name = tensor("op_633_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_395_cast_fp16, var_603_cast_fp16))[name = tensor("op_633_cast_fp16")]; + tensor var_635_equation_0 = const()[name = tensor("op_635_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_407_cast_fp16, var_604_cast_fp16))[name = tensor("op_635_cast_fp16")]; + tensor var_637_equation_0 = const()[name = tensor("op_637_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_407_cast_fp16, var_605_cast_fp16))[name = tensor("op_637_cast_fp16")]; + tensor var_639_equation_0 = const()[name = tensor("op_639_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_407_cast_fp16, var_606_cast_fp16))[name = tensor("op_639_cast_fp16")]; + tensor var_641_equation_0 = const()[name = tensor("op_641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_419_cast_fp16, var_607_cast_fp16))[name = tensor("op_641_cast_fp16")]; + tensor var_643_equation_0 = const()[name = tensor("op_643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_419_cast_fp16, var_608_cast_fp16))[name = tensor("op_643_cast_fp16")]; + tensor var_645_equation_0 = const()[name = tensor("op_645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_419_cast_fp16, var_609_cast_fp16))[name = tensor("op_645_cast_fp16")]; + tensor var_647_equation_0 = const()[name = tensor("op_647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_647_cast_fp16 = einsum(equation = var_647_equation_0, values = (var_431_cast_fp16, var_610_cast_fp16))[name = tensor("op_647_cast_fp16")]; + tensor var_649_equation_0 = const()[name = tensor("op_649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_649_cast_fp16 = einsum(equation = var_649_equation_0, values = (var_431_cast_fp16, var_611_cast_fp16))[name = tensor("op_649_cast_fp16")]; + tensor var_651_equation_0 = const()[name = tensor("op_651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_651_cast_fp16 = einsum(equation = var_651_equation_0, values = (var_431_cast_fp16, var_612_cast_fp16))[name = tensor("op_651_cast_fp16")]; + tensor var_653_equation_0 = const()[name = tensor("op_653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_653_cast_fp16 = einsum(equation = var_653_equation_0, values = (var_443_cast_fp16, var_613_cast_fp16))[name = tensor("op_653_cast_fp16")]; + tensor var_655_equation_0 = const()[name = tensor("op_655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_655_cast_fp16 = einsum(equation = var_655_equation_0, values = (var_443_cast_fp16, var_614_cast_fp16))[name = tensor("op_655_cast_fp16")]; + tensor var_657_equation_0 = const()[name = tensor("op_657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_657_cast_fp16 = einsum(equation = var_657_equation_0, values = (var_443_cast_fp16, var_615_cast_fp16))[name = tensor("op_657_cast_fp16")]; + tensor var_659_equation_0 = const()[name = tensor("op_659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_659_cast_fp16 = einsum(equation = var_659_equation_0, values = (var_455_cast_fp16, var_616_cast_fp16))[name = tensor("op_659_cast_fp16")]; + tensor var_661_equation_0 = const()[name = tensor("op_661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_661_cast_fp16 = einsum(equation = var_661_equation_0, values = (var_455_cast_fp16, var_617_cast_fp16))[name = tensor("op_661_cast_fp16")]; + tensor var_663_equation_0 = const()[name = tensor("op_663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_663_cast_fp16 = einsum(equation = var_663_equation_0, values = (var_455_cast_fp16, var_618_cast_fp16))[name = tensor("op_663_cast_fp16")]; + tensor var_665_equation_0 = const()[name = tensor("op_665_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_665_cast_fp16 = einsum(equation = var_665_equation_0, values = (var_467_cast_fp16, var_619_cast_fp16))[name = tensor("op_665_cast_fp16")]; + tensor var_667_equation_0 = const()[name = tensor("op_667_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_667_cast_fp16 = einsum(equation = var_667_equation_0, values = (var_467_cast_fp16, var_620_cast_fp16))[name = tensor("op_667_cast_fp16")]; + tensor var_669_equation_0 = const()[name = tensor("op_669_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_669_cast_fp16 = einsum(equation = var_669_equation_0, values = (var_467_cast_fp16, var_621_cast_fp16))[name = tensor("op_669_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_52, interleave = x_11_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16, var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16, var_647_cast_fp16, var_649_cast_fp16, var_651_cast_fp16, var_653_cast_fp16, var_655_cast_fp16, var_657_cast_fp16, var_659_cast_fp16, var_661_cast_fp16, var_663_cast_fp16, var_665_cast_fp16, var_667_cast_fp16, var_669_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_674 = const()[name = tensor("op_674"), val = tensor([1, 3072, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_674, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_677 = const()[name = tensor("op_677"), val = tensor([1, 1])]; + tensor var_679 = const()[name = tensor("op_679"), val = tensor([1, 1])]; + tensor attention_output_1_pad_type_0 = const()[name = tensor("attention_output_1_pad_type_0"), val = tensor("custom")]; + tensor attention_output_1_pad_0 = const()[name = tensor("attention_output_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31463936)))]; + tensor attention_output_1_cast_fp16 = conv(dilations = var_679, groups = var_52, pad = attention_output_1_pad_0, pad_type = attention_output_1_pad_type_0, strides = var_677, weight = blocks_0_attn_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338368)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_52, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_55, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_705_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338560)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_716 = const()[name = tensor("op_716"), val = tensor([1, 1])]; + tensor var_718 = const()[name = tensor("op_718"), val = tensor([1, 1])]; + tensor input_7_pad_type_0 = const()[name = tensor("input_7_pad_type_0"), val = tensor("custom")]; + tensor input_7_pad_0 = const()[name = tensor("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50344768)))]; + tensor input_7_cast_fp16 = conv(dilations = var_718, groups = var_52, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_716, weight = blocks_0_mlp_fc_1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_722 = const()[name = tensor("op_722"), val = tensor([1, 1])]; + tensor var_724 = const()[name = tensor("op_724"), val = tensor([1, 1])]; + tensor x_fc_2_1_pad_type_0 = const()[name = tensor("x_fc_2_1_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_1_pad_0 = const()[name = tensor("x_fc_2_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100676480)))]; + tensor x_fc_2_1_cast_fp16 = conv(dilations = var_724, groups = var_52, pad = x_fc_2_1_pad_0, pad_type = x_fc_2_1_pad_type_0, strides = var_722, weight = blocks_0_mlp_fc_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_727_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_727_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_727_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_730 = const()[name = tensor("op_730"), val = tensor([1, 1])]; + tensor var_732 = const()[name = tensor("op_732"), val = tensor([1, 1])]; + tensor var_734_pad_type_0 = const()[name = tensor("op_734_pad_type_0"), val = tensor("custom")]; + tensor var_734_pad_0 = const()[name = tensor("op_734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151008192)))]; + tensor var_734_cast_fp16 = conv(dilations = var_732, groups = var_52, pad = var_734_pad_0, pad_type = var_734_pad_type_0, strides = var_730, weight = blocks_0_mlp_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("op_734_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_734_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_740 = const()[name = tensor("op_740"), val = tensor(-1)]; + tensor var_744 = const()[name = tensor("op_744"), val = tensor(-2)]; + tensor var_746 = const()[name = tensor("op_746"), val = tensor(-3)]; + tensor var_779 = const()[name = tensor("op_779"), val = tensor(1)]; + tensor var_782 = const()[name = tensor("op_782"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201339904)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_779, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_782, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_805_to_fp16 = const()[name = tensor("op_805_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_805_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201340096)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_829 = const()[name = tensor("op_829"), val = tensor([1, 3072, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_829, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_832 = const()[name = tensor("op_832"), val = tensor([1, 1])]; + tensor var_834 = const()[name = tensor("op_834"), val = tensor([1, 1])]; + tensor q_9_pad_type_0 = const()[name = tensor("q_9_pad_type_0"), val = tensor("custom")]; + tensor q_9_pad_0 = const()[name = tensor("q_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201346304)))]; + tensor q_9_cast_fp16 = conv(dilations = var_834, groups = var_779, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = var_832, weight = blocks_1_attn_q_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_838 = const()[name = tensor("op_838"), val = tensor([1, 1])]; + tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 1])]; + tensor k_13_pad_type_0 = const()[name = tensor("k_13_pad_type_0"), val = tensor("custom")]; + tensor k_13_pad_0 = const()[name = tensor("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220220736)))]; + tensor k_13_cast_fp16 = conv(dilations = var_840, groups = var_779, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = var_838, weight = blocks_1_attn_k_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_844 = const()[name = tensor("op_844"), val = tensor([1, 1])]; + tensor var_846 = const()[name = tensor("op_846"), val = tensor([1, 1])]; + tensor v_11_pad_type_0 = const()[name = tensor("v_11_pad_type_0"), val = tensor("custom")]; + tensor v_11_pad_0 = const()[name = tensor("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226512256)))]; + tensor v_11_cast_fp16 = conv(dilations = var_846, groups = var_779, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = var_844, weight = blocks_1_attn_v_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_849 = const()[name = tensor("op_849"), val = tensor([1, 24, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_849, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_851 = const()[name = tensor("op_851"), val = tensor([1, -1, 128, 64])]; + tensor k_15_cast_fp16 = reshape(shape = var_851, x = k_13_cast_fp16)[name = tensor("k_15_cast_fp16")]; + tensor var_865_begin_0 = const()[name = tensor("op_865_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_865_end_0 = const()[name = tensor("op_865_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_865_end_mask_0 = const()[name = tensor("op_865_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_865_cast_fp16")]; + tensor var_871_begin_0 = const()[name = tensor("op_871_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_871_end_0 = const()[name = tensor("op_871_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_871_end_mask_0 = const()[name = tensor("op_871_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_871_cast_fp16 = slice_by_index(begin = var_871_begin_0, end = var_871_end_0, end_mask = var_871_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_871_cast_fp16")]; + tensor const_30_promoted_to_fp16 = const()[name = tensor("const_30_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_873_cast_fp16 = mul(x = var_871_cast_fp16, y = const_30_promoted_to_fp16)[name = tensor("op_873_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_744, interleave = rotated_5_interleave_0, values = (var_873_cast_fp16, var_865_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_876_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_876_cast_fp16")]; + tensor var_877_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_877_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_876_cast_fp16, y = var_877_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_890_begin_0 = const()[name = tensor("op_890_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_890_end_0 = const()[name = tensor("op_890_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_890_end_mask_0 = const()[name = tensor("op_890_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_890_cast_fp16 = slice_by_index(begin = var_890_begin_0, end = var_890_end_0, end_mask = var_890_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_890_cast_fp16")]; + tensor var_896_begin_0 = const()[name = tensor("op_896_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_896_end_0 = const()[name = tensor("op_896_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_896_end_mask_0 = const()[name = tensor("op_896_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_896_cast_fp16 = slice_by_index(begin = var_896_begin_0, end = var_896_end_0, end_mask = var_896_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_896_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_898_cast_fp16 = mul(x = var_896_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_898_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_744, interleave = rotated_interleave_0, values = (var_898_cast_fp16, var_890_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_901_cast_fp16 = mul(x = k_15_cast_fp16, y = cos)[name = tensor("op_901_cast_fp16")]; + tensor var_902_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_902_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_901_cast_fp16, y = var_902_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_905 = const()[name = tensor("op_905"), val = tensor([1, -1, 1, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_905, x = roped_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_907 = const()[name = tensor("op_907"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_907, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_21_perm_0, x = k_19_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_746, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_740, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_915 = const()[name = tensor("op_915"), val = tensor([1, 3072, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_915, x = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_920_begin_0 = const()[name = tensor("op_920_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_end_0 = const()[name = tensor("op_920_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_920_end_mask_0 = const()[name = tensor("op_920_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = q_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor var_924_begin_0 = const()[name = tensor("op_924_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_924_end_0 = const()[name = tensor("op_924_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_924_end_mask_0 = const()[name = tensor("op_924_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_924_cast_fp16 = slice_by_index(begin = var_924_begin_0, end = var_924_end_0, end_mask = var_924_end_mask_0, x = q_cast_fp16)[name = tensor("op_924_cast_fp16")]; + tensor var_928_begin_0 = const()[name = tensor("op_928_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_928_end_0 = const()[name = tensor("op_928_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_928_end_mask_0 = const()[name = tensor("op_928_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = q_cast_fp16)[name = tensor("op_928_cast_fp16")]; + tensor var_932_begin_0 = const()[name = tensor("op_932_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_932_end_0 = const()[name = tensor("op_932_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_932_end_mask_0 = const()[name = tensor("op_932_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_932_cast_fp16 = slice_by_index(begin = var_932_begin_0, end = var_932_end_0, end_mask = var_932_end_mask_0, x = q_cast_fp16)[name = tensor("op_932_cast_fp16")]; + tensor var_936_begin_0 = const()[name = tensor("op_936_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_936_end_0 = const()[name = tensor("op_936_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_936_end_mask_0 = const()[name = tensor("op_936_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_936_cast_fp16 = slice_by_index(begin = var_936_begin_0, end = var_936_end_0, end_mask = var_936_end_mask_0, x = q_cast_fp16)[name = tensor("op_936_cast_fp16")]; + tensor var_940_begin_0 = const()[name = tensor("op_940_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_940_end_0 = const()[name = tensor("op_940_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_940_end_mask_0 = const()[name = tensor("op_940_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_940_cast_fp16 = slice_by_index(begin = var_940_begin_0, end = var_940_end_0, end_mask = var_940_end_mask_0, x = q_cast_fp16)[name = tensor("op_940_cast_fp16")]; + tensor var_944_begin_0 = const()[name = tensor("op_944_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_944_end_0 = const()[name = tensor("op_944_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_944_end_mask_0 = const()[name = tensor("op_944_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_944_cast_fp16 = slice_by_index(begin = var_944_begin_0, end = var_944_end_0, end_mask = var_944_end_mask_0, x = q_cast_fp16)[name = tensor("op_944_cast_fp16")]; + tensor var_948_begin_0 = const()[name = tensor("op_948_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_948_end_0 = const()[name = tensor("op_948_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_948_end_mask_0 = const()[name = tensor("op_948_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_948_cast_fp16 = slice_by_index(begin = var_948_begin_0, end = var_948_end_0, end_mask = var_948_end_mask_0, x = q_cast_fp16)[name = tensor("op_948_cast_fp16")]; + tensor var_952_begin_0 = const()[name = tensor("op_952_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_952_end_0 = const()[name = tensor("op_952_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_952_end_mask_0 = const()[name = tensor("op_952_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_952_cast_fp16 = slice_by_index(begin = var_952_begin_0, end = var_952_end_0, end_mask = var_952_end_mask_0, x = q_cast_fp16)[name = tensor("op_952_cast_fp16")]; + tensor var_956_begin_0 = const()[name = tensor("op_956_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_956_end_0 = const()[name = tensor("op_956_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_956_end_mask_0 = const()[name = tensor("op_956_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_956_cast_fp16 = slice_by_index(begin = var_956_begin_0, end = var_956_end_0, end_mask = var_956_end_mask_0, x = q_cast_fp16)[name = tensor("op_956_cast_fp16")]; + tensor var_960_begin_0 = const()[name = tensor("op_960_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_960_end_0 = const()[name = tensor("op_960_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_960_end_mask_0 = const()[name = tensor("op_960_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_960_cast_fp16 = slice_by_index(begin = var_960_begin_0, end = var_960_end_0, end_mask = var_960_end_mask_0, x = q_cast_fp16)[name = tensor("op_960_cast_fp16")]; + tensor var_964_begin_0 = const()[name = tensor("op_964_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_964_end_0 = const()[name = tensor("op_964_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_964_end_mask_0 = const()[name = tensor("op_964_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_964_cast_fp16 = slice_by_index(begin = var_964_begin_0, end = var_964_end_0, end_mask = var_964_end_mask_0, x = q_cast_fp16)[name = tensor("op_964_cast_fp16")]; + tensor var_968_begin_0 = const()[name = tensor("op_968_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_968_end_0 = const()[name = tensor("op_968_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_968_end_mask_0 = const()[name = tensor("op_968_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = q_cast_fp16)[name = tensor("op_968_cast_fp16")]; + tensor var_972_begin_0 = const()[name = tensor("op_972_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_972_end_0 = const()[name = tensor("op_972_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_972_end_mask_0 = const()[name = tensor("op_972_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = q_cast_fp16)[name = tensor("op_972_cast_fp16")]; + tensor var_976_begin_0 = const()[name = tensor("op_976_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_976_end_0 = const()[name = tensor("op_976_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_976_end_mask_0 = const()[name = tensor("op_976_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = var_976_end_0, end_mask = var_976_end_mask_0, x = q_cast_fp16)[name = tensor("op_976_cast_fp16")]; + tensor var_980_begin_0 = const()[name = tensor("op_980_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_980_end_0 = const()[name = tensor("op_980_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_980_end_mask_0 = const()[name = tensor("op_980_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_980_cast_fp16 = slice_by_index(begin = var_980_begin_0, end = var_980_end_0, end_mask = var_980_end_mask_0, x = q_cast_fp16)[name = tensor("op_980_cast_fp16")]; + tensor var_984_begin_0 = const()[name = tensor("op_984_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_984_end_0 = const()[name = tensor("op_984_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_984_end_mask_0 = const()[name = tensor("op_984_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_984_cast_fp16 = slice_by_index(begin = var_984_begin_0, end = var_984_end_0, end_mask = var_984_end_mask_0, x = q_cast_fp16)[name = tensor("op_984_cast_fp16")]; + tensor var_988_begin_0 = const()[name = tensor("op_988_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_988_end_0 = const()[name = tensor("op_988_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_988_end_mask_0 = const()[name = tensor("op_988_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_988_cast_fp16 = slice_by_index(begin = var_988_begin_0, end = var_988_end_0, end_mask = var_988_end_mask_0, x = q_cast_fp16)[name = tensor("op_988_cast_fp16")]; + tensor var_992_begin_0 = const()[name = tensor("op_992_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_992_end_0 = const()[name = tensor("op_992_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_992_end_mask_0 = const()[name = tensor("op_992_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_992_cast_fp16 = slice_by_index(begin = var_992_begin_0, end = var_992_end_0, end_mask = var_992_end_mask_0, x = q_cast_fp16)[name = tensor("op_992_cast_fp16")]; + tensor var_996_begin_0 = const()[name = tensor("op_996_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_996_end_0 = const()[name = tensor("op_996_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_996_end_mask_0 = const()[name = tensor("op_996_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_996_cast_fp16 = slice_by_index(begin = var_996_begin_0, end = var_996_end_0, end_mask = var_996_end_mask_0, x = q_cast_fp16)[name = tensor("op_996_cast_fp16")]; + tensor var_1000_begin_0 = const()[name = tensor("op_1000_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1000_end_0 = const()[name = tensor("op_1000_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1000_end_mask_0 = const()[name = tensor("op_1000_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = var_1000_end_0, end_mask = var_1000_end_mask_0, x = q_cast_fp16)[name = tensor("op_1000_cast_fp16")]; + tensor var_1004_begin_0 = const()[name = tensor("op_1004_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1004_end_0 = const()[name = tensor("op_1004_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1004_end_mask_0 = const()[name = tensor("op_1004_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1004_cast_fp16 = slice_by_index(begin = var_1004_begin_0, end = var_1004_end_0, end_mask = var_1004_end_mask_0, x = q_cast_fp16)[name = tensor("op_1004_cast_fp16")]; + tensor var_1008_begin_0 = const()[name = tensor("op_1008_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1008_end_0 = const()[name = tensor("op_1008_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1008_end_mask_0 = const()[name = tensor("op_1008_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = var_1008_end_0, end_mask = var_1008_end_mask_0, x = q_cast_fp16)[name = tensor("op_1008_cast_fp16")]; + tensor var_1012_begin_0 = const()[name = tensor("op_1012_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1012_end_0 = const()[name = tensor("op_1012_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1012_end_mask_0 = const()[name = tensor("op_1012_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1012_cast_fp16 = slice_by_index(begin = var_1012_begin_0, end = var_1012_end_0, end_mask = var_1012_end_mask_0, x = q_cast_fp16)[name = tensor("op_1012_cast_fp16")]; + tensor var_1018_begin_0 = const()[name = tensor("op_1018_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1018_end_0 = const()[name = tensor("op_1018_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1018_end_mask_0 = const()[name = tensor("op_1018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1018_cast_fp16 = slice_by_index(begin = var_1018_begin_0, end = var_1018_end_0, end_mask = var_1018_end_mask_0, x = k_cast_fp16)[name = tensor("op_1018_cast_fp16")]; + tensor var_1030_begin_0 = const()[name = tensor("op_1030_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1030_end_0 = const()[name = tensor("op_1030_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1030_end_mask_0 = const()[name = tensor("op_1030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, x = k_cast_fp16)[name = tensor("op_1030_cast_fp16")]; + tensor var_1042_begin_0 = const()[name = tensor("op_1042_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1042_end_0 = const()[name = tensor("op_1042_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1042_end_mask_0 = const()[name = tensor("op_1042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = k_cast_fp16)[name = tensor("op_1042_cast_fp16")]; + tensor var_1054_begin_0 = const()[name = tensor("op_1054_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1054_end_0 = const()[name = tensor("op_1054_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1054_end_mask_0 = const()[name = tensor("op_1054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1054_cast_fp16 = slice_by_index(begin = var_1054_begin_0, end = var_1054_end_0, end_mask = var_1054_end_mask_0, x = k_cast_fp16)[name = tensor("op_1054_cast_fp16")]; + tensor var_1066_begin_0 = const()[name = tensor("op_1066_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1066_end_0 = const()[name = tensor("op_1066_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1066_end_mask_0 = const()[name = tensor("op_1066_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1066_cast_fp16 = slice_by_index(begin = var_1066_begin_0, end = var_1066_end_0, end_mask = var_1066_end_mask_0, x = k_cast_fp16)[name = tensor("op_1066_cast_fp16")]; + tensor var_1078_begin_0 = const()[name = tensor("op_1078_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1078_end_0 = const()[name = tensor("op_1078_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1078_end_mask_0 = const()[name = tensor("op_1078_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1078_cast_fp16 = slice_by_index(begin = var_1078_begin_0, end = var_1078_end_0, end_mask = var_1078_end_mask_0, x = k_cast_fp16)[name = tensor("op_1078_cast_fp16")]; + tensor var_1090_begin_0 = const()[name = tensor("op_1090_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1090_end_0 = const()[name = tensor("op_1090_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1090_end_mask_0 = const()[name = tensor("op_1090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = k_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor var_1102_begin_0 = const()[name = tensor("op_1102_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1102_end_0 = const()[name = tensor("op_1102_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1102_end_mask_0 = const()[name = tensor("op_1102_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1102_cast_fp16 = slice_by_index(begin = var_1102_begin_0, end = var_1102_end_0, end_mask = var_1102_end_mask_0, x = k_cast_fp16)[name = tensor("op_1102_cast_fp16")]; + tensor var_1112_begin_0 = const()[name = tensor("op_1112_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1112_end_0 = const()[name = tensor("op_1112_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1112_end_mask_0 = const()[name = tensor("op_1112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1112_cast_fp16")]; + tensor var_1124_begin_0 = const()[name = tensor("op_1124_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1124_end_0 = const()[name = tensor("op_1124_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1124_end_mask_0 = const()[name = tensor("op_1124_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1124_cast_fp16")]; + tensor var_1136_begin_0 = const()[name = tensor("op_1136_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1136_end_0 = const()[name = tensor("op_1136_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1136_end_mask_0 = const()[name = tensor("op_1136_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1136_cast_fp16")]; + tensor var_1148_begin_0 = const()[name = tensor("op_1148_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1148_end_0 = const()[name = tensor("op_1148_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1148_end_mask_0 = const()[name = tensor("op_1148_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1148_cast_fp16")]; + tensor var_1160_begin_0 = const()[name = tensor("op_1160_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1160_end_0 = const()[name = tensor("op_1160_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1160_end_mask_0 = const()[name = tensor("op_1160_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1160_cast_fp16")]; + tensor var_1172_begin_0 = const()[name = tensor("op_1172_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1172_end_0 = const()[name = tensor("op_1172_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1172_end_mask_0 = const()[name = tensor("op_1172_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1172_cast_fp16 = slice_by_index(begin = var_1172_begin_0, end = var_1172_end_0, end_mask = var_1172_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1172_cast_fp16")]; + tensor var_1184_begin_0 = const()[name = tensor("op_1184_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1184_end_0 = const()[name = tensor("op_1184_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1184_end_mask_0 = const()[name = tensor("op_1184_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1184_cast_fp16 = slice_by_index(begin = var_1184_begin_0, end = var_1184_end_0, end_mask = var_1184_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1184_cast_fp16")]; + tensor var_1196_begin_0 = const()[name = tensor("op_1196_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1196_end_0 = const()[name = tensor("op_1196_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1196_end_mask_0 = const()[name = tensor("op_1196_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1196_cast_fp16")]; + tensor var_1208_equation_0 = const()[name = tensor("op_1208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1208_cast_fp16 = einsum(equation = var_1208_equation_0, values = (var_1018_cast_fp16, var_920_cast_fp16))[name = tensor("op_1208_cast_fp16")]; + tensor var_1209_to_fp16 = const()[name = tensor("op_1209_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1210_cast_fp16 = mul(x = var_1208_cast_fp16, y = var_1209_to_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1212_equation_0 = const()[name = tensor("op_1212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1212_cast_fp16 = einsum(equation = var_1212_equation_0, values = (var_1018_cast_fp16, var_924_cast_fp16))[name = tensor("op_1212_cast_fp16")]; + tensor var_1213_to_fp16 = const()[name = tensor("op_1213_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1214_cast_fp16 = mul(x = var_1212_cast_fp16, y = var_1213_to_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1216_equation_0 = const()[name = tensor("op_1216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1216_cast_fp16 = einsum(equation = var_1216_equation_0, values = (var_1018_cast_fp16, var_928_cast_fp16))[name = tensor("op_1216_cast_fp16")]; + tensor var_1217_to_fp16 = const()[name = tensor("op_1217_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1218_cast_fp16 = mul(x = var_1216_cast_fp16, y = var_1217_to_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1220_equation_0 = const()[name = tensor("op_1220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1220_cast_fp16 = einsum(equation = var_1220_equation_0, values = (var_1030_cast_fp16, var_932_cast_fp16))[name = tensor("op_1220_cast_fp16")]; + tensor var_1221_to_fp16 = const()[name = tensor("op_1221_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1222_cast_fp16 = mul(x = var_1220_cast_fp16, y = var_1221_to_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1224_equation_0 = const()[name = tensor("op_1224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1224_cast_fp16 = einsum(equation = var_1224_equation_0, values = (var_1030_cast_fp16, var_936_cast_fp16))[name = tensor("op_1224_cast_fp16")]; + tensor var_1225_to_fp16 = const()[name = tensor("op_1225_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1226_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1030_cast_fp16, var_940_cast_fp16))[name = tensor("op_1228_cast_fp16")]; + tensor var_1229_to_fp16 = const()[name = tensor("op_1229_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1230_cast_fp16 = mul(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1042_cast_fp16, var_944_cast_fp16))[name = tensor("op_1232_cast_fp16")]; + tensor var_1233_to_fp16 = const()[name = tensor("op_1233_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1234_cast_fp16 = mul(x = var_1232_cast_fp16, y = var_1233_to_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1042_cast_fp16, var_948_cast_fp16))[name = tensor("op_1236_cast_fp16")]; + tensor var_1237_to_fp16 = const()[name = tensor("op_1237_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1238_cast_fp16 = mul(x = var_1236_cast_fp16, y = var_1237_to_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1042_cast_fp16, var_952_cast_fp16))[name = tensor("op_1240_cast_fp16")]; + tensor var_1241_to_fp16 = const()[name = tensor("op_1241_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1242_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1054_cast_fp16, var_956_cast_fp16))[name = tensor("op_1244_cast_fp16")]; + tensor var_1245_to_fp16 = const()[name = tensor("op_1245_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1246_cast_fp16 = mul(x = var_1244_cast_fp16, y = var_1245_to_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1054_cast_fp16, var_960_cast_fp16))[name = tensor("op_1248_cast_fp16")]; + tensor var_1249_to_fp16 = const()[name = tensor("op_1249_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1250_cast_fp16 = mul(x = var_1248_cast_fp16, y = var_1249_to_fp16)[name = tensor("op_1250_cast_fp16")]; + tensor var_1252_equation_0 = const()[name = tensor("op_1252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1054_cast_fp16, var_964_cast_fp16))[name = tensor("op_1252_cast_fp16")]; + tensor var_1253_to_fp16 = const()[name = tensor("op_1253_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1254_cast_fp16 = mul(x = var_1252_cast_fp16, y = var_1253_to_fp16)[name = tensor("op_1254_cast_fp16")]; + tensor var_1256_equation_0 = const()[name = tensor("op_1256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1066_cast_fp16, var_968_cast_fp16))[name = tensor("op_1256_cast_fp16")]; + tensor var_1257_to_fp16 = const()[name = tensor("op_1257_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1258_cast_fp16 = mul(x = var_1256_cast_fp16, y = var_1257_to_fp16)[name = tensor("op_1258_cast_fp16")]; + tensor var_1260_equation_0 = const()[name = tensor("op_1260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1066_cast_fp16, var_972_cast_fp16))[name = tensor("op_1260_cast_fp16")]; + tensor var_1261_to_fp16 = const()[name = tensor("op_1261_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1262_cast_fp16 = mul(x = var_1260_cast_fp16, y = var_1261_to_fp16)[name = tensor("op_1262_cast_fp16")]; + tensor var_1264_equation_0 = const()[name = tensor("op_1264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1066_cast_fp16, var_976_cast_fp16))[name = tensor("op_1264_cast_fp16")]; + tensor var_1265_to_fp16 = const()[name = tensor("op_1265_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1266_cast_fp16 = mul(x = var_1264_cast_fp16, y = var_1265_to_fp16)[name = tensor("op_1266_cast_fp16")]; + tensor var_1268_equation_0 = const()[name = tensor("op_1268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1078_cast_fp16, var_980_cast_fp16))[name = tensor("op_1268_cast_fp16")]; + tensor var_1269_to_fp16 = const()[name = tensor("op_1269_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1270_cast_fp16 = mul(x = var_1268_cast_fp16, y = var_1269_to_fp16)[name = tensor("op_1270_cast_fp16")]; + tensor var_1272_equation_0 = const()[name = tensor("op_1272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1078_cast_fp16, var_984_cast_fp16))[name = tensor("op_1272_cast_fp16")]; + tensor var_1273_to_fp16 = const()[name = tensor("op_1273_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1274_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = tensor("op_1274_cast_fp16")]; + tensor var_1276_equation_0 = const()[name = tensor("op_1276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1078_cast_fp16, var_988_cast_fp16))[name = tensor("op_1276_cast_fp16")]; + tensor var_1277_to_fp16 = const()[name = tensor("op_1277_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1278_cast_fp16 = mul(x = var_1276_cast_fp16, y = var_1277_to_fp16)[name = tensor("op_1278_cast_fp16")]; + tensor var_1280_equation_0 = const()[name = tensor("op_1280_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1090_cast_fp16, var_992_cast_fp16))[name = tensor("op_1280_cast_fp16")]; + tensor var_1281_to_fp16 = const()[name = tensor("op_1281_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1282_cast_fp16 = mul(x = var_1280_cast_fp16, y = var_1281_to_fp16)[name = tensor("op_1282_cast_fp16")]; + tensor var_1284_equation_0 = const()[name = tensor("op_1284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1284_cast_fp16 = einsum(equation = var_1284_equation_0, values = (var_1090_cast_fp16, var_996_cast_fp16))[name = tensor("op_1284_cast_fp16")]; + tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1286_cast_fp16 = mul(x = var_1284_cast_fp16, y = var_1285_to_fp16)[name = tensor("op_1286_cast_fp16")]; + tensor var_1288_equation_0 = const()[name = tensor("op_1288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1288_cast_fp16 = einsum(equation = var_1288_equation_0, values = (var_1090_cast_fp16, var_1000_cast_fp16))[name = tensor("op_1288_cast_fp16")]; + tensor var_1289_to_fp16 = const()[name = tensor("op_1289_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1290_cast_fp16 = mul(x = var_1288_cast_fp16, y = var_1289_to_fp16)[name = tensor("op_1290_cast_fp16")]; + tensor var_1292_equation_0 = const()[name = tensor("op_1292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1292_cast_fp16 = einsum(equation = var_1292_equation_0, values = (var_1102_cast_fp16, var_1004_cast_fp16))[name = tensor("op_1292_cast_fp16")]; + tensor var_1293_to_fp16 = const()[name = tensor("op_1293_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1294_cast_fp16 = mul(x = var_1292_cast_fp16, y = var_1293_to_fp16)[name = tensor("op_1294_cast_fp16")]; + tensor var_1296_equation_0 = const()[name = tensor("op_1296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1296_cast_fp16 = einsum(equation = var_1296_equation_0, values = (var_1102_cast_fp16, var_1008_cast_fp16))[name = tensor("op_1296_cast_fp16")]; + tensor var_1297_to_fp16 = const()[name = tensor("op_1297_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1298_cast_fp16 = mul(x = var_1296_cast_fp16, y = var_1297_to_fp16)[name = tensor("op_1298_cast_fp16")]; + tensor var_1300_equation_0 = const()[name = tensor("op_1300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1300_cast_fp16 = einsum(equation = var_1300_equation_0, values = (var_1102_cast_fp16, var_1012_cast_fp16))[name = tensor("op_1300_cast_fp16")]; + tensor var_1301_to_fp16 = const()[name = tensor("op_1301_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1302_cast_fp16 = mul(x = var_1300_cast_fp16, y = var_1301_to_fp16)[name = tensor("op_1302_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_1210_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_1214_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_1218_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_1222_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_1226_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_1230_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_1234_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_1238_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1242_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1246_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1250_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1254_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1258_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1262_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1266_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1270_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1274_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1278_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1282_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1286_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1290_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1294_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1298_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_1302_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_1327_cast_fp16 = softmax(axis = var_779, x = aw_49_cast_fp16)[name = tensor("op_1327_cast_fp16")]; + tensor var_1328_cast_fp16 = softmax(axis = var_779, x = aw_51_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1329_cast_fp16 = softmax(axis = var_779, x = aw_53_cast_fp16)[name = tensor("op_1329_cast_fp16")]; + tensor var_1330_cast_fp16 = softmax(axis = var_779, x = aw_55_cast_fp16)[name = tensor("op_1330_cast_fp16")]; + tensor var_1331_cast_fp16 = softmax(axis = var_779, x = aw_57_cast_fp16)[name = tensor("op_1331_cast_fp16")]; + tensor var_1332_cast_fp16 = softmax(axis = var_779, x = aw_59_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1333_cast_fp16 = softmax(axis = var_779, x = aw_61_cast_fp16)[name = tensor("op_1333_cast_fp16")]; + tensor var_1334_cast_fp16 = softmax(axis = var_779, x = aw_63_cast_fp16)[name = tensor("op_1334_cast_fp16")]; + tensor var_1335_cast_fp16 = softmax(axis = var_779, x = aw_65_cast_fp16)[name = tensor("op_1335_cast_fp16")]; + tensor var_1336_cast_fp16 = softmax(axis = var_779, x = aw_67_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1337_cast_fp16 = softmax(axis = var_779, x = aw_69_cast_fp16)[name = tensor("op_1337_cast_fp16")]; + tensor var_1338_cast_fp16 = softmax(axis = var_779, x = aw_71_cast_fp16)[name = tensor("op_1338_cast_fp16")]; + tensor var_1339_cast_fp16 = softmax(axis = var_779, x = aw_73_cast_fp16)[name = tensor("op_1339_cast_fp16")]; + tensor var_1340_cast_fp16 = softmax(axis = var_779, x = aw_75_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1341_cast_fp16 = softmax(axis = var_779, x = aw_77_cast_fp16)[name = tensor("op_1341_cast_fp16")]; + tensor var_1342_cast_fp16 = softmax(axis = var_779, x = aw_79_cast_fp16)[name = tensor("op_1342_cast_fp16")]; + tensor var_1343_cast_fp16 = softmax(axis = var_779, x = aw_81_cast_fp16)[name = tensor("op_1343_cast_fp16")]; + tensor var_1344_cast_fp16 = softmax(axis = var_779, x = aw_83_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1345_cast_fp16 = softmax(axis = var_779, x = aw_85_cast_fp16)[name = tensor("op_1345_cast_fp16")]; + tensor var_1346_cast_fp16 = softmax(axis = var_779, x = aw_87_cast_fp16)[name = tensor("op_1346_cast_fp16")]; + tensor var_1347_cast_fp16 = softmax(axis = var_779, x = aw_89_cast_fp16)[name = tensor("op_1347_cast_fp16")]; + tensor var_1348_cast_fp16 = softmax(axis = var_779, x = aw_91_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1349_cast_fp16 = softmax(axis = var_779, x = aw_93_cast_fp16)[name = tensor("op_1349_cast_fp16")]; + tensor var_1350_cast_fp16 = softmax(axis = var_779, x = aw_cast_fp16)[name = tensor("op_1350_cast_fp16")]; + tensor var_1352_equation_0 = const()[name = tensor("op_1352_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1352_cast_fp16 = einsum(equation = var_1352_equation_0, values = (var_1112_cast_fp16, var_1327_cast_fp16))[name = tensor("op_1352_cast_fp16")]; + tensor var_1354_equation_0 = const()[name = tensor("op_1354_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1354_cast_fp16 = einsum(equation = var_1354_equation_0, values = (var_1112_cast_fp16, var_1328_cast_fp16))[name = tensor("op_1354_cast_fp16")]; + tensor var_1356_equation_0 = const()[name = tensor("op_1356_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1356_cast_fp16 = einsum(equation = var_1356_equation_0, values = (var_1112_cast_fp16, var_1329_cast_fp16))[name = tensor("op_1356_cast_fp16")]; + tensor var_1358_equation_0 = const()[name = tensor("op_1358_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1358_cast_fp16 = einsum(equation = var_1358_equation_0, values = (var_1124_cast_fp16, var_1330_cast_fp16))[name = tensor("op_1358_cast_fp16")]; + tensor var_1360_equation_0 = const()[name = tensor("op_1360_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1360_cast_fp16 = einsum(equation = var_1360_equation_0, values = (var_1124_cast_fp16, var_1331_cast_fp16))[name = tensor("op_1360_cast_fp16")]; + tensor var_1362_equation_0 = const()[name = tensor("op_1362_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1362_cast_fp16 = einsum(equation = var_1362_equation_0, values = (var_1124_cast_fp16, var_1332_cast_fp16))[name = tensor("op_1362_cast_fp16")]; + tensor var_1364_equation_0 = const()[name = tensor("op_1364_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1364_cast_fp16 = einsum(equation = var_1364_equation_0, values = (var_1136_cast_fp16, var_1333_cast_fp16))[name = tensor("op_1364_cast_fp16")]; + tensor var_1366_equation_0 = const()[name = tensor("op_1366_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1366_cast_fp16 = einsum(equation = var_1366_equation_0, values = (var_1136_cast_fp16, var_1334_cast_fp16))[name = tensor("op_1366_cast_fp16")]; + tensor var_1368_equation_0 = const()[name = tensor("op_1368_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1368_cast_fp16 = einsum(equation = var_1368_equation_0, values = (var_1136_cast_fp16, var_1335_cast_fp16))[name = tensor("op_1368_cast_fp16")]; + tensor var_1370_equation_0 = const()[name = tensor("op_1370_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1370_cast_fp16 = einsum(equation = var_1370_equation_0, values = (var_1148_cast_fp16, var_1336_cast_fp16))[name = tensor("op_1370_cast_fp16")]; + tensor var_1372_equation_0 = const()[name = tensor("op_1372_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1372_cast_fp16 = einsum(equation = var_1372_equation_0, values = (var_1148_cast_fp16, var_1337_cast_fp16))[name = tensor("op_1372_cast_fp16")]; + tensor var_1374_equation_0 = const()[name = tensor("op_1374_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1374_cast_fp16 = einsum(equation = var_1374_equation_0, values = (var_1148_cast_fp16, var_1338_cast_fp16))[name = tensor("op_1374_cast_fp16")]; + tensor var_1376_equation_0 = const()[name = tensor("op_1376_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1376_cast_fp16 = einsum(equation = var_1376_equation_0, values = (var_1160_cast_fp16, var_1339_cast_fp16))[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_equation_0 = const()[name = tensor("op_1378_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1378_cast_fp16 = einsum(equation = var_1378_equation_0, values = (var_1160_cast_fp16, var_1340_cast_fp16))[name = tensor("op_1378_cast_fp16")]; + tensor var_1380_equation_0 = const()[name = tensor("op_1380_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1380_cast_fp16 = einsum(equation = var_1380_equation_0, values = (var_1160_cast_fp16, var_1341_cast_fp16))[name = tensor("op_1380_cast_fp16")]; + tensor var_1382_equation_0 = const()[name = tensor("op_1382_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1382_cast_fp16 = einsum(equation = var_1382_equation_0, values = (var_1172_cast_fp16, var_1342_cast_fp16))[name = tensor("op_1382_cast_fp16")]; + tensor var_1384_equation_0 = const()[name = tensor("op_1384_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1384_cast_fp16 = einsum(equation = var_1384_equation_0, values = (var_1172_cast_fp16, var_1343_cast_fp16))[name = tensor("op_1384_cast_fp16")]; + tensor var_1386_equation_0 = const()[name = tensor("op_1386_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1386_cast_fp16 = einsum(equation = var_1386_equation_0, values = (var_1172_cast_fp16, var_1344_cast_fp16))[name = tensor("op_1386_cast_fp16")]; + tensor var_1388_equation_0 = const()[name = tensor("op_1388_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1388_cast_fp16 = einsum(equation = var_1388_equation_0, values = (var_1184_cast_fp16, var_1345_cast_fp16))[name = tensor("op_1388_cast_fp16")]; + tensor var_1390_equation_0 = const()[name = tensor("op_1390_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1390_cast_fp16 = einsum(equation = var_1390_equation_0, values = (var_1184_cast_fp16, var_1346_cast_fp16))[name = tensor("op_1390_cast_fp16")]; + tensor var_1392_equation_0 = const()[name = tensor("op_1392_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1392_cast_fp16 = einsum(equation = var_1392_equation_0, values = (var_1184_cast_fp16, var_1347_cast_fp16))[name = tensor("op_1392_cast_fp16")]; + tensor var_1394_equation_0 = const()[name = tensor("op_1394_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1394_cast_fp16 = einsum(equation = var_1394_equation_0, values = (var_1196_cast_fp16, var_1348_cast_fp16))[name = tensor("op_1394_cast_fp16")]; + tensor var_1396_equation_0 = const()[name = tensor("op_1396_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1396_cast_fp16 = einsum(equation = var_1396_equation_0, values = (var_1196_cast_fp16, var_1349_cast_fp16))[name = tensor("op_1396_cast_fp16")]; + tensor var_1398_equation_0 = const()[name = tensor("op_1398_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1398_cast_fp16 = einsum(equation = var_1398_equation_0, values = (var_1196_cast_fp16, var_1350_cast_fp16))[name = tensor("op_1398_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_779, interleave = x_27_interleave_0, values = (var_1352_cast_fp16, var_1354_cast_fp16, var_1356_cast_fp16, var_1358_cast_fp16, var_1360_cast_fp16, var_1362_cast_fp16, var_1364_cast_fp16, var_1366_cast_fp16, var_1368_cast_fp16, var_1370_cast_fp16, var_1372_cast_fp16, var_1374_cast_fp16, var_1376_cast_fp16, var_1378_cast_fp16, var_1380_cast_fp16, var_1382_cast_fp16, var_1384_cast_fp16, var_1386_cast_fp16, var_1388_cast_fp16, var_1390_cast_fp16, var_1392_cast_fp16, var_1394_cast_fp16, var_1396_cast_fp16, var_1398_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1403 = const()[name = tensor("op_1403"), val = tensor([1, 3072, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1403, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1406 = const()[name = tensor("op_1406"), val = tensor([1, 1])]; + tensor var_1408 = const()[name = tensor("op_1408"), val = tensor([1, 1])]; + tensor attention_output_pad_type_0 = const()[name = tensor("attention_output_pad_type_0"), val = tensor("custom")]; + tensor attention_output_pad_0 = const()[name = tensor("attention_output_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232803776)))]; + tensor attention_output_cast_fp16 = conv(dilations = var_1408, groups = var_779, pad = attention_output_pad_0, pad_type = attention_output_pad_type_0, strides = var_1406, weight = blocks_1_attn_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678208)))]; + tensor x_eps_cast_fp16 = concat(axis = var_779, interleave = x_eps_interleave_0, values = (x_29_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_782, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1434_to_fp16 = const()[name = tensor("op_1434_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1434_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678400)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1445 = const()[name = tensor("op_1445"), val = tensor([1, 1])]; + tensor var_1447 = const()[name = tensor("op_1447"), val = tensor([1, 1])]; + tensor input_17_pad_type_0 = const()[name = tensor("input_17_pad_type_0"), val = tensor("custom")]; + tensor input_17_pad_0 = const()[name = tensor("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251684608)))]; + tensor input_17_cast_fp16 = conv(dilations = var_1447, groups = var_779, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_1445, weight = blocks_1_mlp_fc_1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1451 = const()[name = tensor("op_1451"), val = tensor([1, 1])]; + tensor var_1453 = const()[name = tensor("op_1453"), val = tensor([1, 1])]; + tensor x_fc_2_pad_type_0 = const()[name = tensor("x_fc_2_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_pad_0 = const()[name = tensor("x_fc_2_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302016320)))]; + tensor x_fc_2_cast_fp16 = conv(dilations = var_1453, groups = var_779, pad = x_fc_2_pad_0, pad_type = x_fc_2_pad_type_0, strides = var_1451, weight = blocks_1_mlp_fc_2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_1456_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1456_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_1456_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_1459 = const()[name = tensor("op_1459"), val = tensor([1, 1])]; + tensor var_1461 = const()[name = tensor("op_1461"), val = tensor([1, 1])]; + tensor var_1463_pad_type_0 = const()[name = tensor("op_1463_pad_type_0"), val = tensor("custom")]; + tensor var_1463_pad_0 = const()[name = tensor("op_1463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(352348032)))]; + tensor var_1463_cast_fp16 = conv(dilations = var_1461, groups = var_779, pad = var_1463_pad_0, pad_type = var_1463_pad_type_0, strides = var_1459, weight = blocks_1_mlp_proj_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_1463_cast_fp16")]; + tensor new_x = add(x = var_1463_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_1464_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_v_cache_0, new_k_cache_1, new_v_cache_1); +} \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk11.mlmodelc/weights/weight.bin b/Llama-3.2-3B-Instruct_chunk11.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..d9c855b7b9002f9da80db355b0c7893d31cbd976 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk11.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d05d3ea64682e5a7113cac2eafb36ecb827788c42f94832abe42470a06e6bd90 +size 402679744 diff --git a/Llama-3.2-3B-Instruct_chunk12.mlmodelc/analytics/coremldata.bin b/Llama-3.2-3B-Instruct_chunk12.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a63af39cde8e590e41fffd270ab8aede737490d --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk12.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf21e446e7587de3fd840eae95f3e79729298df568725552f7ef5fd8f954e58c +size 243 diff --git a/Llama-3.2-3B-Instruct_chunk12.mlmodelc/coremldata.bin b/Llama-3.2-3B-Instruct_chunk12.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef844658693d8a7fc2951abf2761f8f5f9bc62c3 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk12.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8129d684aa1ea8b76708a186fe44f7ffc4aa08b4854907105fe41c0825e71875 +size 653 diff --git a/Llama-3.2-3B-Instruct_chunk12.mlmodelc/metadata.json b/Llama-3.2-3B-Instruct_chunk12.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..0764699f180c1456e847c7b2e0f5b8c1083d8acc --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk12.mlmodelc/metadata.json @@ -0,0 +1,178 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 14, + "Ios16.mul" : 70, + "SliceByIndex" : 88, + "Transpose" : 2, + "Ios16.einsum" : 96, + "Ios16.conv" : 14, + "Ios16.add" : 56, + "Ios16.realDiv" : 4, + "Ios16.softmax" : 48, + "Ios16.reduceL2Norm" : 4, + "Ios16.reshape" : 14, + "Ios16.silu" : 2 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 512, 1, 64]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_3_2_3B_Instruct_2024_11_09_16_14_37_chunk12", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk12.mlmodelc/model.mil b/Llama-3.2-3B-Instruct_chunk12.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..78594b4291dc45ae43652f9a31200581b19ad3c6 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk12.mlmodelc/model.mil @@ -0,0 +1,956 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { + tensor var_13 = const()[name = tensor("op_13"), val = tensor(-1)]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-2)]; + tensor var_19 = const()[name = tensor("op_19"), val = tensor(-3)]; + tensor var_52 = const()[name = tensor("op_52"), val = tensor(1)]; + tensor var_55 = const()[name = tensor("op_55"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_52, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_55, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_79_to_fp16 = const()[name = tensor("op_79_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_79_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_100 = const()[name = tensor("op_100"), val = tensor([1, 3072, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_100, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_103 = const()[name = tensor("op_103"), val = tensor([1, 1])]; + tensor var_105 = const()[name = tensor("op_105"), val = tensor([1, 1])]; + tensor q_1_pad_type_0 = const()[name = tensor("q_1_pad_type_0"), val = tensor("custom")]; + tensor q_1_pad_0 = const()[name = tensor("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6464)))]; + tensor q_1_cast_fp16 = conv(dilations = var_105, groups = var_52, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = var_103, weight = blocks_0_attn_q_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_109 = const()[name = tensor("op_109"), val = tensor([1, 1])]; + tensor var_111 = const()[name = tensor("op_111"), val = tensor([1, 1])]; + tensor k_1_pad_type_0 = const()[name = tensor("k_1_pad_type_0"), val = tensor("custom")]; + tensor k_1_pad_0 = const()[name = tensor("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18880896)))]; + tensor k_1_cast_fp16 = conv(dilations = var_111, groups = var_52, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = var_109, weight = blocks_0_attn_k_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_115 = const()[name = tensor("op_115"), val = tensor([1, 1])]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor v_1_pad_type_0 = const()[name = tensor("v_1_pad_type_0"), val = tensor("custom")]; + tensor v_1_pad_0 = const()[name = tensor("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25172416)))]; + tensor v_1_cast_fp16 = conv(dilations = var_117, groups = var_52, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = var_115, weight = blocks_0_attn_v_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_120 = const()[name = tensor("op_120"), val = tensor([1, 24, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_120, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_122 = const()[name = tensor("op_122"), val = tensor([1, -1, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_122, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_136_begin_0 = const()[name = tensor("op_136_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_136_end_0 = const()[name = tensor("op_136_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_136_end_mask_0 = const()[name = tensor("op_136_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_136_cast_fp16 = slice_by_index(begin = var_136_begin_0, end = var_136_end_0, end_mask = var_136_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_136_cast_fp16")]; + tensor var_142_begin_0 = const()[name = tensor("op_142_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_142_end_0 = const()[name = tensor("op_142_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_142_end_mask_0 = const()[name = tensor("op_142_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_142_cast_fp16 = slice_by_index(begin = var_142_begin_0, end = var_142_end_0, end_mask = var_142_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_142_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_144_cast_fp16 = mul(x = var_142_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_144_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_17, interleave = rotated_1_interleave_0, values = (var_144_cast_fp16, var_136_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_147_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_147_cast_fp16")]; + tensor var_148_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_148_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_147_cast_fp16, y = var_148_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor var_167_begin_0 = const()[name = tensor("op_167_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_167_end_0 = const()[name = tensor("op_167_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_167_end_mask_0 = const()[name = tensor("op_167_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_167_cast_fp16 = slice_by_index(begin = var_167_begin_0, end = var_167_end_0, end_mask = var_167_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_167_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_169_cast_fp16 = mul(x = var_167_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_169_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_17, interleave = rotated_3_interleave_0, values = (var_169_cast_fp16, var_161_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_172_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_172_cast_fp16")]; + tensor var_173_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_173_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_172_cast_fp16, y = var_173_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_176 = const()[name = tensor("op_176"), val = tensor([1, -1, 1, 64])]; + tensor k_7_cast_fp16 = reshape(shape = var_176, x = roped_3_cast_fp16)[name = tensor("k_7_cast_fp16")]; + tensor var_178 = const()[name = tensor("op_178"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_178, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_11_interleave_0 = const()[name = tensor("k_11_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_9_perm_0, x = k_7_cast_fp16)[name = tensor("transpose_1")]; + tensor k_11_cast_fp16 = concat(axis = var_19, interleave = k_11_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_11_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_13, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1, 3072, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_186, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_191_begin_0 = const()[name = tensor("op_191_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_191_end_0 = const()[name = tensor("op_191_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_191_end_mask_0 = const()[name = tensor("op_191_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_191_cast_fp16 = slice_by_index(begin = var_191_begin_0, end = var_191_end_0, end_mask = var_191_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_191_cast_fp16")]; + tensor var_195_begin_0 = const()[name = tensor("op_195_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_195_end_0 = const()[name = tensor("op_195_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_195_end_mask_0 = const()[name = tensor("op_195_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_195_cast_fp16 = slice_by_index(begin = var_195_begin_0, end = var_195_end_0, end_mask = var_195_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_195_cast_fp16")]; + tensor var_199_begin_0 = const()[name = tensor("op_199_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_199_end_0 = const()[name = tensor("op_199_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_199_end_mask_0 = const()[name = tensor("op_199_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_199_cast_fp16 = slice_by_index(begin = var_199_begin_0, end = var_199_end_0, end_mask = var_199_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_199_cast_fp16")]; + tensor var_203_begin_0 = const()[name = tensor("op_203_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_203_end_0 = const()[name = tensor("op_203_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_203_end_mask_0 = const()[name = tensor("op_203_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_203_cast_fp16 = slice_by_index(begin = var_203_begin_0, end = var_203_end_0, end_mask = var_203_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_203_cast_fp16")]; + tensor var_207_begin_0 = const()[name = tensor("op_207_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_207_end_0 = const()[name = tensor("op_207_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_207_end_mask_0 = const()[name = tensor("op_207_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = var_207_end_0, end_mask = var_207_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_207_cast_fp16")]; + tensor var_211_begin_0 = const()[name = tensor("op_211_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_211_end_0 = const()[name = tensor("op_211_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_211_end_mask_0 = const()[name = tensor("op_211_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_211_cast_fp16 = slice_by_index(begin = var_211_begin_0, end = var_211_end_0, end_mask = var_211_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_211_cast_fp16")]; + tensor var_215_begin_0 = const()[name = tensor("op_215_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_215_end_0 = const()[name = tensor("op_215_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_215_end_mask_0 = const()[name = tensor("op_215_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_215_cast_fp16 = slice_by_index(begin = var_215_begin_0, end = var_215_end_0, end_mask = var_215_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_215_cast_fp16")]; + tensor var_219_begin_0 = const()[name = tensor("op_219_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_219_end_0 = const()[name = tensor("op_219_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_219_end_mask_0 = const()[name = tensor("op_219_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_219_cast_fp16 = slice_by_index(begin = var_219_begin_0, end = var_219_end_0, end_mask = var_219_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_219_cast_fp16")]; + tensor var_223_begin_0 = const()[name = tensor("op_223_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_223_end_0 = const()[name = tensor("op_223_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_223_end_mask_0 = const()[name = tensor("op_223_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_223_cast_fp16 = slice_by_index(begin = var_223_begin_0, end = var_223_end_0, end_mask = var_223_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_223_cast_fp16")]; + tensor var_227_begin_0 = const()[name = tensor("op_227_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_227_end_0 = const()[name = tensor("op_227_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_227_end_mask_0 = const()[name = tensor("op_227_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_227_cast_fp16")]; + tensor var_231_begin_0 = const()[name = tensor("op_231_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_231_end_0 = const()[name = tensor("op_231_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_231_end_mask_0 = const()[name = tensor("op_231_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_231_cast_fp16 = slice_by_index(begin = var_231_begin_0, end = var_231_end_0, end_mask = var_231_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_231_cast_fp16")]; + tensor var_235_begin_0 = const()[name = tensor("op_235_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_235_end_0 = const()[name = tensor("op_235_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_235_end_mask_0 = const()[name = tensor("op_235_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = var_235_end_0, end_mask = var_235_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_235_cast_fp16")]; + tensor var_239_begin_0 = const()[name = tensor("op_239_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_239_end_0 = const()[name = tensor("op_239_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_239_end_mask_0 = const()[name = tensor("op_239_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_239_cast_fp16")]; + tensor var_243_begin_0 = const()[name = tensor("op_243_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_243_end_0 = const()[name = tensor("op_243_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_243_end_mask_0 = const()[name = tensor("op_243_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_243_cast_fp16")]; + tensor var_247_begin_0 = const()[name = tensor("op_247_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_247_end_0 = const()[name = tensor("op_247_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_247_end_mask_0 = const()[name = tensor("op_247_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_247_cast_fp16")]; + tensor var_251_begin_0 = const()[name = tensor("op_251_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_251_end_0 = const()[name = tensor("op_251_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_251_end_mask_0 = const()[name = tensor("op_251_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_251_cast_fp16")]; + tensor var_255_begin_0 = const()[name = tensor("op_255_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_255_end_0 = const()[name = tensor("op_255_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_255_end_mask_0 = const()[name = tensor("op_255_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_255_cast_fp16")]; + tensor var_259_begin_0 = const()[name = tensor("op_259_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_259_end_0 = const()[name = tensor("op_259_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_259_end_mask_0 = const()[name = tensor("op_259_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_263_cast_fp16")]; + tensor var_267_begin_0 = const()[name = tensor("op_267_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_267_end_0 = const()[name = tensor("op_267_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_267_end_mask_0 = const()[name = tensor("op_267_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_267_cast_fp16")]; + tensor var_271_begin_0 = const()[name = tensor("op_271_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_271_end_0 = const()[name = tensor("op_271_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_271_end_mask_0 = const()[name = tensor("op_271_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_271_cast_fp16")]; + tensor var_275_begin_0 = const()[name = tensor("op_275_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_275_end_0 = const()[name = tensor("op_275_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_275_end_mask_0 = const()[name = tensor("op_275_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_275_cast_fp16")]; + tensor var_279_begin_0 = const()[name = tensor("op_279_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_279_end_0 = const()[name = tensor("op_279_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_279_end_mask_0 = const()[name = tensor("op_279_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_279_cast_fp16")]; + tensor var_283_begin_0 = const()[name = tensor("op_283_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_283_end_0 = const()[name = tensor("op_283_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_283_end_mask_0 = const()[name = tensor("op_283_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_283_cast_fp16")]; + tensor var_289_begin_0 = const()[name = tensor("op_289_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_289_end_0 = const()[name = tensor("op_289_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_289_end_mask_0 = const()[name = tensor("op_289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_289_cast_fp16")]; + tensor var_301_begin_0 = const()[name = tensor("op_301_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_301_end_0 = const()[name = tensor("op_301_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_301_end_mask_0 = const()[name = tensor("op_301_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_301_cast_fp16")]; + tensor var_313_begin_0 = const()[name = tensor("op_313_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_313_end_0 = const()[name = tensor("op_313_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_313_end_mask_0 = const()[name = tensor("op_313_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_313_cast_fp16 = slice_by_index(begin = var_313_begin_0, end = var_313_end_0, end_mask = var_313_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_313_cast_fp16")]; + tensor var_325_begin_0 = const()[name = tensor("op_325_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_325_end_0 = const()[name = tensor("op_325_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_325_end_mask_0 = const()[name = tensor("op_325_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_325_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_349_begin_0 = const()[name = tensor("op_349_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_349_end_0 = const()[name = tensor("op_349_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_349_end_mask_0 = const()[name = tensor("op_349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_349_cast_fp16 = slice_by_index(begin = var_349_begin_0, end = var_349_end_0, end_mask = var_349_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_349_cast_fp16")]; + tensor var_361_begin_0 = const()[name = tensor("op_361_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_361_end_0 = const()[name = tensor("op_361_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_361_end_mask_0 = const()[name = tensor("op_361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_361_cast_fp16 = slice_by_index(begin = var_361_begin_0, end = var_361_end_0, end_mask = var_361_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_361_cast_fp16")]; + tensor var_373_begin_0 = const()[name = tensor("op_373_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_373_end_0 = const()[name = tensor("op_373_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_373_end_mask_0 = const()[name = tensor("op_373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_373_cast_fp16")]; + tensor var_383_begin_0 = const()[name = tensor("op_383_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_383_end_0 = const()[name = tensor("op_383_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_383_end_mask_0 = const()[name = tensor("op_383_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_383_cast_fp16")]; + tensor var_395_begin_0 = const()[name = tensor("op_395_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_395_end_0 = const()[name = tensor("op_395_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_395_end_mask_0 = const()[name = tensor("op_395_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_395_cast_fp16")]; + tensor var_407_begin_0 = const()[name = tensor("op_407_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_407_end_0 = const()[name = tensor("op_407_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_407_end_mask_0 = const()[name = tensor("op_407_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_407_cast_fp16 = slice_by_index(begin = var_407_begin_0, end = var_407_end_0, end_mask = var_407_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_407_cast_fp16")]; + tensor var_419_begin_0 = const()[name = tensor("op_419_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_419_end_0 = const()[name = tensor("op_419_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_419_end_mask_0 = const()[name = tensor("op_419_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_419_cast_fp16 = slice_by_index(begin = var_419_begin_0, end = var_419_end_0, end_mask = var_419_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_419_cast_fp16")]; + tensor var_431_begin_0 = const()[name = tensor("op_431_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_431_end_0 = const()[name = tensor("op_431_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_431_end_mask_0 = const()[name = tensor("op_431_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_431_cast_fp16 = slice_by_index(begin = var_431_begin_0, end = var_431_end_0, end_mask = var_431_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_431_cast_fp16")]; + tensor var_443_begin_0 = const()[name = tensor("op_443_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_443_end_0 = const()[name = tensor("op_443_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_443_end_mask_0 = const()[name = tensor("op_443_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_443_cast_fp16 = slice_by_index(begin = var_443_begin_0, end = var_443_end_0, end_mask = var_443_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_443_cast_fp16")]; + tensor var_455_begin_0 = const()[name = tensor("op_455_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_455_end_0 = const()[name = tensor("op_455_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_455_end_mask_0 = const()[name = tensor("op_455_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_455_cast_fp16")]; + tensor var_467_begin_0 = const()[name = tensor("op_467_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_467_end_0 = const()[name = tensor("op_467_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_467_end_mask_0 = const()[name = tensor("op_467_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_467_cast_fp16 = slice_by_index(begin = var_467_begin_0, end = var_467_end_0, end_mask = var_467_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_467_cast_fp16")]; + tensor var_479_equation_0 = const()[name = tensor("op_479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_289_cast_fp16, var_191_cast_fp16))[name = tensor("op_479_cast_fp16")]; + tensor var_480_to_fp16 = const()[name = tensor("op_480_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_481_cast_fp16 = mul(x = var_479_cast_fp16, y = var_480_to_fp16)[name = tensor("op_481_cast_fp16")]; + tensor var_483_equation_0 = const()[name = tensor("op_483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_289_cast_fp16, var_195_cast_fp16))[name = tensor("op_483_cast_fp16")]; + tensor var_484_to_fp16 = const()[name = tensor("op_484_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_485_cast_fp16 = mul(x = var_483_cast_fp16, y = var_484_to_fp16)[name = tensor("op_485_cast_fp16")]; + tensor var_487_equation_0 = const()[name = tensor("op_487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_487_cast_fp16 = einsum(equation = var_487_equation_0, values = (var_289_cast_fp16, var_199_cast_fp16))[name = tensor("op_487_cast_fp16")]; + tensor var_488_to_fp16 = const()[name = tensor("op_488_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_489_cast_fp16 = mul(x = var_487_cast_fp16, y = var_488_to_fp16)[name = tensor("op_489_cast_fp16")]; + tensor var_491_equation_0 = const()[name = tensor("op_491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_491_cast_fp16 = einsum(equation = var_491_equation_0, values = (var_301_cast_fp16, var_203_cast_fp16))[name = tensor("op_491_cast_fp16")]; + tensor var_492_to_fp16 = const()[name = tensor("op_492_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_493_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = tensor("op_493_cast_fp16")]; + tensor var_495_equation_0 = const()[name = tensor("op_495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_495_cast_fp16 = einsum(equation = var_495_equation_0, values = (var_301_cast_fp16, var_207_cast_fp16))[name = tensor("op_495_cast_fp16")]; + tensor var_496_to_fp16 = const()[name = tensor("op_496_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_497_cast_fp16 = mul(x = var_495_cast_fp16, y = var_496_to_fp16)[name = tensor("op_497_cast_fp16")]; + tensor var_499_equation_0 = const()[name = tensor("op_499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_499_cast_fp16 = einsum(equation = var_499_equation_0, values = (var_301_cast_fp16, var_211_cast_fp16))[name = tensor("op_499_cast_fp16")]; + tensor var_500_to_fp16 = const()[name = tensor("op_500_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_501_cast_fp16 = mul(x = var_499_cast_fp16, y = var_500_to_fp16)[name = tensor("op_501_cast_fp16")]; + tensor var_503_equation_0 = const()[name = tensor("op_503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_503_cast_fp16 = einsum(equation = var_503_equation_0, values = (var_313_cast_fp16, var_215_cast_fp16))[name = tensor("op_503_cast_fp16")]; + tensor var_504_to_fp16 = const()[name = tensor("op_504_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_505_cast_fp16 = mul(x = var_503_cast_fp16, y = var_504_to_fp16)[name = tensor("op_505_cast_fp16")]; + tensor var_507_equation_0 = const()[name = tensor("op_507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_507_cast_fp16 = einsum(equation = var_507_equation_0, values = (var_313_cast_fp16, var_219_cast_fp16))[name = tensor("op_507_cast_fp16")]; + tensor var_508_to_fp16 = const()[name = tensor("op_508_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16, y = var_508_to_fp16)[name = tensor("op_509_cast_fp16")]; + tensor var_511_equation_0 = const()[name = tensor("op_511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_511_cast_fp16 = einsum(equation = var_511_equation_0, values = (var_313_cast_fp16, var_223_cast_fp16))[name = tensor("op_511_cast_fp16")]; + tensor var_512_to_fp16 = const()[name = tensor("op_512_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_513_cast_fp16 = mul(x = var_511_cast_fp16, y = var_512_to_fp16)[name = tensor("op_513_cast_fp16")]; + tensor var_515_equation_0 = const()[name = tensor("op_515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_515_cast_fp16 = einsum(equation = var_515_equation_0, values = (var_325_cast_fp16, var_227_cast_fp16))[name = tensor("op_515_cast_fp16")]; + tensor var_516_to_fp16 = const()[name = tensor("op_516_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_517_cast_fp16 = mul(x = var_515_cast_fp16, y = var_516_to_fp16)[name = tensor("op_517_cast_fp16")]; + tensor var_519_equation_0 = const()[name = tensor("op_519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_519_cast_fp16 = einsum(equation = var_519_equation_0, values = (var_325_cast_fp16, var_231_cast_fp16))[name = tensor("op_519_cast_fp16")]; + tensor var_520_to_fp16 = const()[name = tensor("op_520_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_521_cast_fp16 = mul(x = var_519_cast_fp16, y = var_520_to_fp16)[name = tensor("op_521_cast_fp16")]; + tensor var_523_equation_0 = const()[name = tensor("op_523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_325_cast_fp16, var_235_cast_fp16))[name = tensor("op_523_cast_fp16")]; + tensor var_524_to_fp16 = const()[name = tensor("op_524_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_525_cast_fp16 = mul(x = var_523_cast_fp16, y = var_524_to_fp16)[name = tensor("op_525_cast_fp16")]; + tensor var_527_equation_0 = const()[name = tensor("op_527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_337_cast_fp16, var_239_cast_fp16))[name = tensor("op_527_cast_fp16")]; + tensor var_528_to_fp16 = const()[name = tensor("op_528_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_529_cast_fp16 = mul(x = var_527_cast_fp16, y = var_528_to_fp16)[name = tensor("op_529_cast_fp16")]; + tensor var_531_equation_0 = const()[name = tensor("op_531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_337_cast_fp16, var_243_cast_fp16))[name = tensor("op_531_cast_fp16")]; + tensor var_532_to_fp16 = const()[name = tensor("op_532_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_533_cast_fp16 = mul(x = var_531_cast_fp16, y = var_532_to_fp16)[name = tensor("op_533_cast_fp16")]; + tensor var_535_equation_0 = const()[name = tensor("op_535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_337_cast_fp16, var_247_cast_fp16))[name = tensor("op_535_cast_fp16")]; + tensor var_536_to_fp16 = const()[name = tensor("op_536_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_537_cast_fp16 = mul(x = var_535_cast_fp16, y = var_536_to_fp16)[name = tensor("op_537_cast_fp16")]; + tensor var_539_equation_0 = const()[name = tensor("op_539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_539_cast_fp16 = einsum(equation = var_539_equation_0, values = (var_349_cast_fp16, var_251_cast_fp16))[name = tensor("op_539_cast_fp16")]; + tensor var_540_to_fp16 = const()[name = tensor("op_540_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_541_cast_fp16 = mul(x = var_539_cast_fp16, y = var_540_to_fp16)[name = tensor("op_541_cast_fp16")]; + tensor var_543_equation_0 = const()[name = tensor("op_543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_543_cast_fp16 = einsum(equation = var_543_equation_0, values = (var_349_cast_fp16, var_255_cast_fp16))[name = tensor("op_543_cast_fp16")]; + tensor var_544_to_fp16 = const()[name = tensor("op_544_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_545_cast_fp16 = mul(x = var_543_cast_fp16, y = var_544_to_fp16)[name = tensor("op_545_cast_fp16")]; + tensor var_547_equation_0 = const()[name = tensor("op_547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_349_cast_fp16, var_259_cast_fp16))[name = tensor("op_547_cast_fp16")]; + tensor var_548_to_fp16 = const()[name = tensor("op_548_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_549_cast_fp16 = mul(x = var_547_cast_fp16, y = var_548_to_fp16)[name = tensor("op_549_cast_fp16")]; + tensor var_551_equation_0 = const()[name = tensor("op_551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_361_cast_fp16, var_263_cast_fp16))[name = tensor("op_551_cast_fp16")]; + tensor var_552_to_fp16 = const()[name = tensor("op_552_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_553_cast_fp16 = mul(x = var_551_cast_fp16, y = var_552_to_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_555_equation_0 = const()[name = tensor("op_555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_361_cast_fp16, var_267_cast_fp16))[name = tensor("op_555_cast_fp16")]; + tensor var_556_to_fp16 = const()[name = tensor("op_556_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_557_cast_fp16 = mul(x = var_555_cast_fp16, y = var_556_to_fp16)[name = tensor("op_557_cast_fp16")]; + tensor var_559_equation_0 = const()[name = tensor("op_559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_361_cast_fp16, var_271_cast_fp16))[name = tensor("op_559_cast_fp16")]; + tensor var_560_to_fp16 = const()[name = tensor("op_560_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = var_560_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor var_563_equation_0 = const()[name = tensor("op_563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_373_cast_fp16, var_275_cast_fp16))[name = tensor("op_563_cast_fp16")]; + tensor var_564_to_fp16 = const()[name = tensor("op_564_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_565_cast_fp16 = mul(x = var_563_cast_fp16, y = var_564_to_fp16)[name = tensor("op_565_cast_fp16")]; + tensor var_567_equation_0 = const()[name = tensor("op_567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_373_cast_fp16, var_279_cast_fp16))[name = tensor("op_567_cast_fp16")]; + tensor var_568_to_fp16 = const()[name = tensor("op_568_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_569_cast_fp16 = mul(x = var_567_cast_fp16, y = var_568_to_fp16)[name = tensor("op_569_cast_fp16")]; + tensor var_571_equation_0 = const()[name = tensor("op_571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_373_cast_fp16, var_283_cast_fp16))[name = tensor("op_571_cast_fp16")]; + tensor var_572_to_fp16 = const()[name = tensor("op_572_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_573_cast_fp16 = mul(x = var_571_cast_fp16, y = var_572_to_fp16)[name = tensor("op_573_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_481_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_485_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_489_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_493_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_497_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_501_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_505_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_509_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_513_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_517_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_521_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_525_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_529_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_533_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_537_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_541_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_545_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_549_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_553_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_557_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_561_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_565_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_569_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_573_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor var_598_cast_fp16 = softmax(axis = var_52, x = aw_1_cast_fp16)[name = tensor("op_598_cast_fp16")]; + tensor var_599_cast_fp16 = softmax(axis = var_52, x = aw_3_cast_fp16)[name = tensor("op_599_cast_fp16")]; + tensor var_600_cast_fp16 = softmax(axis = var_52, x = aw_5_cast_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_601_cast_fp16 = softmax(axis = var_52, x = aw_7_cast_fp16)[name = tensor("op_601_cast_fp16")]; + tensor var_602_cast_fp16 = softmax(axis = var_52, x = aw_9_cast_fp16)[name = tensor("op_602_cast_fp16")]; + tensor var_603_cast_fp16 = softmax(axis = var_52, x = aw_11_cast_fp16)[name = tensor("op_603_cast_fp16")]; + tensor var_604_cast_fp16 = softmax(axis = var_52, x = aw_13_cast_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_605_cast_fp16 = softmax(axis = var_52, x = aw_15_cast_fp16)[name = tensor("op_605_cast_fp16")]; + tensor var_606_cast_fp16 = softmax(axis = var_52, x = aw_17_cast_fp16)[name = tensor("op_606_cast_fp16")]; + tensor var_607_cast_fp16 = softmax(axis = var_52, x = aw_19_cast_fp16)[name = tensor("op_607_cast_fp16")]; + tensor var_608_cast_fp16 = softmax(axis = var_52, x = aw_21_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_609_cast_fp16 = softmax(axis = var_52, x = aw_23_cast_fp16)[name = tensor("op_609_cast_fp16")]; + tensor var_610_cast_fp16 = softmax(axis = var_52, x = aw_25_cast_fp16)[name = tensor("op_610_cast_fp16")]; + tensor var_611_cast_fp16 = softmax(axis = var_52, x = aw_27_cast_fp16)[name = tensor("op_611_cast_fp16")]; + tensor var_612_cast_fp16 = softmax(axis = var_52, x = aw_29_cast_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_613_cast_fp16 = softmax(axis = var_52, x = aw_31_cast_fp16)[name = tensor("op_613_cast_fp16")]; + tensor var_614_cast_fp16 = softmax(axis = var_52, x = aw_33_cast_fp16)[name = tensor("op_614_cast_fp16")]; + tensor var_615_cast_fp16 = softmax(axis = var_52, x = aw_35_cast_fp16)[name = tensor("op_615_cast_fp16")]; + tensor var_616_cast_fp16 = softmax(axis = var_52, x = aw_37_cast_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_617_cast_fp16 = softmax(axis = var_52, x = aw_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618_cast_fp16 = softmax(axis = var_52, x = aw_41_cast_fp16)[name = tensor("op_618_cast_fp16")]; + tensor var_619_cast_fp16 = softmax(axis = var_52, x = aw_43_cast_fp16)[name = tensor("op_619_cast_fp16")]; + tensor var_620_cast_fp16 = softmax(axis = var_52, x = aw_45_cast_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_621_cast_fp16 = softmax(axis = var_52, x = aw_47_cast_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_623_equation_0 = const()[name = tensor("op_623_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_383_cast_fp16, var_598_cast_fp16))[name = tensor("op_623_cast_fp16")]; + tensor var_625_equation_0 = const()[name = tensor("op_625_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_383_cast_fp16, var_599_cast_fp16))[name = tensor("op_625_cast_fp16")]; + tensor var_627_equation_0 = const()[name = tensor("op_627_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_383_cast_fp16, var_600_cast_fp16))[name = tensor("op_627_cast_fp16")]; + tensor var_629_equation_0 = const()[name = tensor("op_629_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_395_cast_fp16, var_601_cast_fp16))[name = tensor("op_629_cast_fp16")]; + tensor var_631_equation_0 = const()[name = tensor("op_631_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_395_cast_fp16, var_602_cast_fp16))[name = tensor("op_631_cast_fp16")]; + tensor var_633_equation_0 = const()[name = tensor("op_633_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_395_cast_fp16, var_603_cast_fp16))[name = tensor("op_633_cast_fp16")]; + tensor var_635_equation_0 = const()[name = tensor("op_635_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_407_cast_fp16, var_604_cast_fp16))[name = tensor("op_635_cast_fp16")]; + tensor var_637_equation_0 = const()[name = tensor("op_637_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_407_cast_fp16, var_605_cast_fp16))[name = tensor("op_637_cast_fp16")]; + tensor var_639_equation_0 = const()[name = tensor("op_639_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_407_cast_fp16, var_606_cast_fp16))[name = tensor("op_639_cast_fp16")]; + tensor var_641_equation_0 = const()[name = tensor("op_641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_419_cast_fp16, var_607_cast_fp16))[name = tensor("op_641_cast_fp16")]; + tensor var_643_equation_0 = const()[name = tensor("op_643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_419_cast_fp16, var_608_cast_fp16))[name = tensor("op_643_cast_fp16")]; + tensor var_645_equation_0 = const()[name = tensor("op_645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_419_cast_fp16, var_609_cast_fp16))[name = tensor("op_645_cast_fp16")]; + tensor var_647_equation_0 = const()[name = tensor("op_647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_647_cast_fp16 = einsum(equation = var_647_equation_0, values = (var_431_cast_fp16, var_610_cast_fp16))[name = tensor("op_647_cast_fp16")]; + tensor var_649_equation_0 = const()[name = tensor("op_649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_649_cast_fp16 = einsum(equation = var_649_equation_0, values = (var_431_cast_fp16, var_611_cast_fp16))[name = tensor("op_649_cast_fp16")]; + tensor var_651_equation_0 = const()[name = tensor("op_651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_651_cast_fp16 = einsum(equation = var_651_equation_0, values = (var_431_cast_fp16, var_612_cast_fp16))[name = tensor("op_651_cast_fp16")]; + tensor var_653_equation_0 = const()[name = tensor("op_653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_653_cast_fp16 = einsum(equation = var_653_equation_0, values = (var_443_cast_fp16, var_613_cast_fp16))[name = tensor("op_653_cast_fp16")]; + tensor var_655_equation_0 = const()[name = tensor("op_655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_655_cast_fp16 = einsum(equation = var_655_equation_0, values = (var_443_cast_fp16, var_614_cast_fp16))[name = tensor("op_655_cast_fp16")]; + tensor var_657_equation_0 = const()[name = tensor("op_657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_657_cast_fp16 = einsum(equation = var_657_equation_0, values = (var_443_cast_fp16, var_615_cast_fp16))[name = tensor("op_657_cast_fp16")]; + tensor var_659_equation_0 = const()[name = tensor("op_659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_659_cast_fp16 = einsum(equation = var_659_equation_0, values = (var_455_cast_fp16, var_616_cast_fp16))[name = tensor("op_659_cast_fp16")]; + tensor var_661_equation_0 = const()[name = tensor("op_661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_661_cast_fp16 = einsum(equation = var_661_equation_0, values = (var_455_cast_fp16, var_617_cast_fp16))[name = tensor("op_661_cast_fp16")]; + tensor var_663_equation_0 = const()[name = tensor("op_663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_663_cast_fp16 = einsum(equation = var_663_equation_0, values = (var_455_cast_fp16, var_618_cast_fp16))[name = tensor("op_663_cast_fp16")]; + tensor var_665_equation_0 = const()[name = tensor("op_665_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_665_cast_fp16 = einsum(equation = var_665_equation_0, values = (var_467_cast_fp16, var_619_cast_fp16))[name = tensor("op_665_cast_fp16")]; + tensor var_667_equation_0 = const()[name = tensor("op_667_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_667_cast_fp16 = einsum(equation = var_667_equation_0, values = (var_467_cast_fp16, var_620_cast_fp16))[name = tensor("op_667_cast_fp16")]; + tensor var_669_equation_0 = const()[name = tensor("op_669_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_669_cast_fp16 = einsum(equation = var_669_equation_0, values = (var_467_cast_fp16, var_621_cast_fp16))[name = tensor("op_669_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_52, interleave = x_11_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16, var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16, var_647_cast_fp16, var_649_cast_fp16, var_651_cast_fp16, var_653_cast_fp16, var_655_cast_fp16, var_657_cast_fp16, var_659_cast_fp16, var_661_cast_fp16, var_663_cast_fp16, var_665_cast_fp16, var_667_cast_fp16, var_669_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_674 = const()[name = tensor("op_674"), val = tensor([1, 3072, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_674, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_677 = const()[name = tensor("op_677"), val = tensor([1, 1])]; + tensor var_679 = const()[name = tensor("op_679"), val = tensor([1, 1])]; + tensor attention_output_1_pad_type_0 = const()[name = tensor("attention_output_1_pad_type_0"), val = tensor("custom")]; + tensor attention_output_1_pad_0 = const()[name = tensor("attention_output_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31463936)))]; + tensor attention_output_1_cast_fp16 = conv(dilations = var_679, groups = var_52, pad = attention_output_1_pad_0, pad_type = attention_output_1_pad_type_0, strides = var_677, weight = blocks_0_attn_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338368)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_52, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_55, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_705_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338560)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_716 = const()[name = tensor("op_716"), val = tensor([1, 1])]; + tensor var_718 = const()[name = tensor("op_718"), val = tensor([1, 1])]; + tensor input_7_pad_type_0 = const()[name = tensor("input_7_pad_type_0"), val = tensor("custom")]; + tensor input_7_pad_0 = const()[name = tensor("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50344768)))]; + tensor input_7_cast_fp16 = conv(dilations = var_718, groups = var_52, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_716, weight = blocks_0_mlp_fc_1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_722 = const()[name = tensor("op_722"), val = tensor([1, 1])]; + tensor var_724 = const()[name = tensor("op_724"), val = tensor([1, 1])]; + tensor x_fc_2_1_pad_type_0 = const()[name = tensor("x_fc_2_1_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_1_pad_0 = const()[name = tensor("x_fc_2_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100676480)))]; + tensor x_fc_2_1_cast_fp16 = conv(dilations = var_724, groups = var_52, pad = x_fc_2_1_pad_0, pad_type = x_fc_2_1_pad_type_0, strides = var_722, weight = blocks_0_mlp_fc_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_727_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_727_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_727_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_730 = const()[name = tensor("op_730"), val = tensor([1, 1])]; + tensor var_732 = const()[name = tensor("op_732"), val = tensor([1, 1])]; + tensor var_734_pad_type_0 = const()[name = tensor("op_734_pad_type_0"), val = tensor("custom")]; + tensor var_734_pad_0 = const()[name = tensor("op_734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151008192)))]; + tensor var_734_cast_fp16 = conv(dilations = var_732, groups = var_52, pad = var_734_pad_0, pad_type = var_734_pad_type_0, strides = var_730, weight = blocks_0_mlp_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("op_734_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_734_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_740 = const()[name = tensor("op_740"), val = tensor(-1)]; + tensor var_744 = const()[name = tensor("op_744"), val = tensor(-2)]; + tensor var_746 = const()[name = tensor("op_746"), val = tensor(-3)]; + tensor var_779 = const()[name = tensor("op_779"), val = tensor(1)]; + tensor var_782 = const()[name = tensor("op_782"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201339904)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_779, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_782, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_805_to_fp16 = const()[name = tensor("op_805_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_805_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201340096)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_829 = const()[name = tensor("op_829"), val = tensor([1, 3072, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_829, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_832 = const()[name = tensor("op_832"), val = tensor([1, 1])]; + tensor var_834 = const()[name = tensor("op_834"), val = tensor([1, 1])]; + tensor q_9_pad_type_0 = const()[name = tensor("q_9_pad_type_0"), val = tensor("custom")]; + tensor q_9_pad_0 = const()[name = tensor("q_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201346304)))]; + tensor q_9_cast_fp16 = conv(dilations = var_834, groups = var_779, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = var_832, weight = blocks_1_attn_q_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_838 = const()[name = tensor("op_838"), val = tensor([1, 1])]; + tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 1])]; + tensor k_13_pad_type_0 = const()[name = tensor("k_13_pad_type_0"), val = tensor("custom")]; + tensor k_13_pad_0 = const()[name = tensor("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220220736)))]; + tensor k_13_cast_fp16 = conv(dilations = var_840, groups = var_779, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = var_838, weight = blocks_1_attn_k_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_844 = const()[name = tensor("op_844"), val = tensor([1, 1])]; + tensor var_846 = const()[name = tensor("op_846"), val = tensor([1, 1])]; + tensor v_11_pad_type_0 = const()[name = tensor("v_11_pad_type_0"), val = tensor("custom")]; + tensor v_11_pad_0 = const()[name = tensor("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226512256)))]; + tensor v_11_cast_fp16 = conv(dilations = var_846, groups = var_779, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = var_844, weight = blocks_1_attn_v_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_849 = const()[name = tensor("op_849"), val = tensor([1, 24, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_849, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_851 = const()[name = tensor("op_851"), val = tensor([1, -1, 128, 64])]; + tensor k_15_cast_fp16 = reshape(shape = var_851, x = k_13_cast_fp16)[name = tensor("k_15_cast_fp16")]; + tensor var_865_begin_0 = const()[name = tensor("op_865_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_865_end_0 = const()[name = tensor("op_865_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_865_end_mask_0 = const()[name = tensor("op_865_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_865_cast_fp16")]; + tensor var_871_begin_0 = const()[name = tensor("op_871_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_871_end_0 = const()[name = tensor("op_871_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_871_end_mask_0 = const()[name = tensor("op_871_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_871_cast_fp16 = slice_by_index(begin = var_871_begin_0, end = var_871_end_0, end_mask = var_871_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_871_cast_fp16")]; + tensor const_30_promoted_to_fp16 = const()[name = tensor("const_30_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_873_cast_fp16 = mul(x = var_871_cast_fp16, y = const_30_promoted_to_fp16)[name = tensor("op_873_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_744, interleave = rotated_5_interleave_0, values = (var_873_cast_fp16, var_865_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_876_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_876_cast_fp16")]; + tensor var_877_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_877_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_876_cast_fp16, y = var_877_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_890_begin_0 = const()[name = tensor("op_890_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_890_end_0 = const()[name = tensor("op_890_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_890_end_mask_0 = const()[name = tensor("op_890_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_890_cast_fp16 = slice_by_index(begin = var_890_begin_0, end = var_890_end_0, end_mask = var_890_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_890_cast_fp16")]; + tensor var_896_begin_0 = const()[name = tensor("op_896_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_896_end_0 = const()[name = tensor("op_896_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_896_end_mask_0 = const()[name = tensor("op_896_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_896_cast_fp16 = slice_by_index(begin = var_896_begin_0, end = var_896_end_0, end_mask = var_896_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_896_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_898_cast_fp16 = mul(x = var_896_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_898_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_744, interleave = rotated_interleave_0, values = (var_898_cast_fp16, var_890_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_901_cast_fp16 = mul(x = k_15_cast_fp16, y = cos)[name = tensor("op_901_cast_fp16")]; + tensor var_902_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_902_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_901_cast_fp16, y = var_902_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_905 = const()[name = tensor("op_905"), val = tensor([1, -1, 1, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_905, x = roped_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_907 = const()[name = tensor("op_907"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_907, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_21_perm_0, x = k_19_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_746, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_740, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_915 = const()[name = tensor("op_915"), val = tensor([1, 3072, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_915, x = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_920_begin_0 = const()[name = tensor("op_920_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_end_0 = const()[name = tensor("op_920_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_920_end_mask_0 = const()[name = tensor("op_920_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = q_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor var_924_begin_0 = const()[name = tensor("op_924_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_924_end_0 = const()[name = tensor("op_924_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_924_end_mask_0 = const()[name = tensor("op_924_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_924_cast_fp16 = slice_by_index(begin = var_924_begin_0, end = var_924_end_0, end_mask = var_924_end_mask_0, x = q_cast_fp16)[name = tensor("op_924_cast_fp16")]; + tensor var_928_begin_0 = const()[name = tensor("op_928_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_928_end_0 = const()[name = tensor("op_928_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_928_end_mask_0 = const()[name = tensor("op_928_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = q_cast_fp16)[name = tensor("op_928_cast_fp16")]; + tensor var_932_begin_0 = const()[name = tensor("op_932_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_932_end_0 = const()[name = tensor("op_932_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_932_end_mask_0 = const()[name = tensor("op_932_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_932_cast_fp16 = slice_by_index(begin = var_932_begin_0, end = var_932_end_0, end_mask = var_932_end_mask_0, x = q_cast_fp16)[name = tensor("op_932_cast_fp16")]; + tensor var_936_begin_0 = const()[name = tensor("op_936_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_936_end_0 = const()[name = tensor("op_936_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_936_end_mask_0 = const()[name = tensor("op_936_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_936_cast_fp16 = slice_by_index(begin = var_936_begin_0, end = var_936_end_0, end_mask = var_936_end_mask_0, x = q_cast_fp16)[name = tensor("op_936_cast_fp16")]; + tensor var_940_begin_0 = const()[name = tensor("op_940_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_940_end_0 = const()[name = tensor("op_940_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_940_end_mask_0 = const()[name = tensor("op_940_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_940_cast_fp16 = slice_by_index(begin = var_940_begin_0, end = var_940_end_0, end_mask = var_940_end_mask_0, x = q_cast_fp16)[name = tensor("op_940_cast_fp16")]; + tensor var_944_begin_0 = const()[name = tensor("op_944_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_944_end_0 = const()[name = tensor("op_944_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_944_end_mask_0 = const()[name = tensor("op_944_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_944_cast_fp16 = slice_by_index(begin = var_944_begin_0, end = var_944_end_0, end_mask = var_944_end_mask_0, x = q_cast_fp16)[name = tensor("op_944_cast_fp16")]; + tensor var_948_begin_0 = const()[name = tensor("op_948_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_948_end_0 = const()[name = tensor("op_948_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_948_end_mask_0 = const()[name = tensor("op_948_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_948_cast_fp16 = slice_by_index(begin = var_948_begin_0, end = var_948_end_0, end_mask = var_948_end_mask_0, x = q_cast_fp16)[name = tensor("op_948_cast_fp16")]; + tensor var_952_begin_0 = const()[name = tensor("op_952_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_952_end_0 = const()[name = tensor("op_952_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_952_end_mask_0 = const()[name = tensor("op_952_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_952_cast_fp16 = slice_by_index(begin = var_952_begin_0, end = var_952_end_0, end_mask = var_952_end_mask_0, x = q_cast_fp16)[name = tensor("op_952_cast_fp16")]; + tensor var_956_begin_0 = const()[name = tensor("op_956_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_956_end_0 = const()[name = tensor("op_956_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_956_end_mask_0 = const()[name = tensor("op_956_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_956_cast_fp16 = slice_by_index(begin = var_956_begin_0, end = var_956_end_0, end_mask = var_956_end_mask_0, x = q_cast_fp16)[name = tensor("op_956_cast_fp16")]; + tensor var_960_begin_0 = const()[name = tensor("op_960_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_960_end_0 = const()[name = tensor("op_960_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_960_end_mask_0 = const()[name = tensor("op_960_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_960_cast_fp16 = slice_by_index(begin = var_960_begin_0, end = var_960_end_0, end_mask = var_960_end_mask_0, x = q_cast_fp16)[name = tensor("op_960_cast_fp16")]; + tensor var_964_begin_0 = const()[name = tensor("op_964_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_964_end_0 = const()[name = tensor("op_964_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_964_end_mask_0 = const()[name = tensor("op_964_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_964_cast_fp16 = slice_by_index(begin = var_964_begin_0, end = var_964_end_0, end_mask = var_964_end_mask_0, x = q_cast_fp16)[name = tensor("op_964_cast_fp16")]; + tensor var_968_begin_0 = const()[name = tensor("op_968_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_968_end_0 = const()[name = tensor("op_968_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_968_end_mask_0 = const()[name = tensor("op_968_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = q_cast_fp16)[name = tensor("op_968_cast_fp16")]; + tensor var_972_begin_0 = const()[name = tensor("op_972_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_972_end_0 = const()[name = tensor("op_972_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_972_end_mask_0 = const()[name = tensor("op_972_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = q_cast_fp16)[name = tensor("op_972_cast_fp16")]; + tensor var_976_begin_0 = const()[name = tensor("op_976_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_976_end_0 = const()[name = tensor("op_976_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_976_end_mask_0 = const()[name = tensor("op_976_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = var_976_end_0, end_mask = var_976_end_mask_0, x = q_cast_fp16)[name = tensor("op_976_cast_fp16")]; + tensor var_980_begin_0 = const()[name = tensor("op_980_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_980_end_0 = const()[name = tensor("op_980_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_980_end_mask_0 = const()[name = tensor("op_980_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_980_cast_fp16 = slice_by_index(begin = var_980_begin_0, end = var_980_end_0, end_mask = var_980_end_mask_0, x = q_cast_fp16)[name = tensor("op_980_cast_fp16")]; + tensor var_984_begin_0 = const()[name = tensor("op_984_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_984_end_0 = const()[name = tensor("op_984_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_984_end_mask_0 = const()[name = tensor("op_984_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_984_cast_fp16 = slice_by_index(begin = var_984_begin_0, end = var_984_end_0, end_mask = var_984_end_mask_0, x = q_cast_fp16)[name = tensor("op_984_cast_fp16")]; + tensor var_988_begin_0 = const()[name = tensor("op_988_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_988_end_0 = const()[name = tensor("op_988_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_988_end_mask_0 = const()[name = tensor("op_988_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_988_cast_fp16 = slice_by_index(begin = var_988_begin_0, end = var_988_end_0, end_mask = var_988_end_mask_0, x = q_cast_fp16)[name = tensor("op_988_cast_fp16")]; + tensor var_992_begin_0 = const()[name = tensor("op_992_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_992_end_0 = const()[name = tensor("op_992_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_992_end_mask_0 = const()[name = tensor("op_992_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_992_cast_fp16 = slice_by_index(begin = var_992_begin_0, end = var_992_end_0, end_mask = var_992_end_mask_0, x = q_cast_fp16)[name = tensor("op_992_cast_fp16")]; + tensor var_996_begin_0 = const()[name = tensor("op_996_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_996_end_0 = const()[name = tensor("op_996_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_996_end_mask_0 = const()[name = tensor("op_996_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_996_cast_fp16 = slice_by_index(begin = var_996_begin_0, end = var_996_end_0, end_mask = var_996_end_mask_0, x = q_cast_fp16)[name = tensor("op_996_cast_fp16")]; + tensor var_1000_begin_0 = const()[name = tensor("op_1000_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1000_end_0 = const()[name = tensor("op_1000_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1000_end_mask_0 = const()[name = tensor("op_1000_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = var_1000_end_0, end_mask = var_1000_end_mask_0, x = q_cast_fp16)[name = tensor("op_1000_cast_fp16")]; + tensor var_1004_begin_0 = const()[name = tensor("op_1004_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1004_end_0 = const()[name = tensor("op_1004_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1004_end_mask_0 = const()[name = tensor("op_1004_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1004_cast_fp16 = slice_by_index(begin = var_1004_begin_0, end = var_1004_end_0, end_mask = var_1004_end_mask_0, x = q_cast_fp16)[name = tensor("op_1004_cast_fp16")]; + tensor var_1008_begin_0 = const()[name = tensor("op_1008_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1008_end_0 = const()[name = tensor("op_1008_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1008_end_mask_0 = const()[name = tensor("op_1008_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = var_1008_end_0, end_mask = var_1008_end_mask_0, x = q_cast_fp16)[name = tensor("op_1008_cast_fp16")]; + tensor var_1012_begin_0 = const()[name = tensor("op_1012_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1012_end_0 = const()[name = tensor("op_1012_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1012_end_mask_0 = const()[name = tensor("op_1012_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1012_cast_fp16 = slice_by_index(begin = var_1012_begin_0, end = var_1012_end_0, end_mask = var_1012_end_mask_0, x = q_cast_fp16)[name = tensor("op_1012_cast_fp16")]; + tensor var_1018_begin_0 = const()[name = tensor("op_1018_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1018_end_0 = const()[name = tensor("op_1018_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1018_end_mask_0 = const()[name = tensor("op_1018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1018_cast_fp16 = slice_by_index(begin = var_1018_begin_0, end = var_1018_end_0, end_mask = var_1018_end_mask_0, x = k_cast_fp16)[name = tensor("op_1018_cast_fp16")]; + tensor var_1030_begin_0 = const()[name = tensor("op_1030_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1030_end_0 = const()[name = tensor("op_1030_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1030_end_mask_0 = const()[name = tensor("op_1030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, x = k_cast_fp16)[name = tensor("op_1030_cast_fp16")]; + tensor var_1042_begin_0 = const()[name = tensor("op_1042_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1042_end_0 = const()[name = tensor("op_1042_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1042_end_mask_0 = const()[name = tensor("op_1042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = k_cast_fp16)[name = tensor("op_1042_cast_fp16")]; + tensor var_1054_begin_0 = const()[name = tensor("op_1054_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1054_end_0 = const()[name = tensor("op_1054_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1054_end_mask_0 = const()[name = tensor("op_1054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1054_cast_fp16 = slice_by_index(begin = var_1054_begin_0, end = var_1054_end_0, end_mask = var_1054_end_mask_0, x = k_cast_fp16)[name = tensor("op_1054_cast_fp16")]; + tensor var_1066_begin_0 = const()[name = tensor("op_1066_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1066_end_0 = const()[name = tensor("op_1066_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1066_end_mask_0 = const()[name = tensor("op_1066_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1066_cast_fp16 = slice_by_index(begin = var_1066_begin_0, end = var_1066_end_0, end_mask = var_1066_end_mask_0, x = k_cast_fp16)[name = tensor("op_1066_cast_fp16")]; + tensor var_1078_begin_0 = const()[name = tensor("op_1078_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1078_end_0 = const()[name = tensor("op_1078_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1078_end_mask_0 = const()[name = tensor("op_1078_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1078_cast_fp16 = slice_by_index(begin = var_1078_begin_0, end = var_1078_end_0, end_mask = var_1078_end_mask_0, x = k_cast_fp16)[name = tensor("op_1078_cast_fp16")]; + tensor var_1090_begin_0 = const()[name = tensor("op_1090_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1090_end_0 = const()[name = tensor("op_1090_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1090_end_mask_0 = const()[name = tensor("op_1090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = k_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor var_1102_begin_0 = const()[name = tensor("op_1102_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1102_end_0 = const()[name = tensor("op_1102_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1102_end_mask_0 = const()[name = tensor("op_1102_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1102_cast_fp16 = slice_by_index(begin = var_1102_begin_0, end = var_1102_end_0, end_mask = var_1102_end_mask_0, x = k_cast_fp16)[name = tensor("op_1102_cast_fp16")]; + tensor var_1112_begin_0 = const()[name = tensor("op_1112_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1112_end_0 = const()[name = tensor("op_1112_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1112_end_mask_0 = const()[name = tensor("op_1112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1112_cast_fp16")]; + tensor var_1124_begin_0 = const()[name = tensor("op_1124_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1124_end_0 = const()[name = tensor("op_1124_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1124_end_mask_0 = const()[name = tensor("op_1124_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1124_cast_fp16")]; + tensor var_1136_begin_0 = const()[name = tensor("op_1136_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1136_end_0 = const()[name = tensor("op_1136_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1136_end_mask_0 = const()[name = tensor("op_1136_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1136_cast_fp16")]; + tensor var_1148_begin_0 = const()[name = tensor("op_1148_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1148_end_0 = const()[name = tensor("op_1148_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1148_end_mask_0 = const()[name = tensor("op_1148_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1148_cast_fp16")]; + tensor var_1160_begin_0 = const()[name = tensor("op_1160_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1160_end_0 = const()[name = tensor("op_1160_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1160_end_mask_0 = const()[name = tensor("op_1160_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1160_cast_fp16")]; + tensor var_1172_begin_0 = const()[name = tensor("op_1172_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1172_end_0 = const()[name = tensor("op_1172_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1172_end_mask_0 = const()[name = tensor("op_1172_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1172_cast_fp16 = slice_by_index(begin = var_1172_begin_0, end = var_1172_end_0, end_mask = var_1172_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1172_cast_fp16")]; + tensor var_1184_begin_0 = const()[name = tensor("op_1184_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1184_end_0 = const()[name = tensor("op_1184_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1184_end_mask_0 = const()[name = tensor("op_1184_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1184_cast_fp16 = slice_by_index(begin = var_1184_begin_0, end = var_1184_end_0, end_mask = var_1184_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1184_cast_fp16")]; + tensor var_1196_begin_0 = const()[name = tensor("op_1196_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1196_end_0 = const()[name = tensor("op_1196_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1196_end_mask_0 = const()[name = tensor("op_1196_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1196_cast_fp16")]; + tensor var_1208_equation_0 = const()[name = tensor("op_1208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1208_cast_fp16 = einsum(equation = var_1208_equation_0, values = (var_1018_cast_fp16, var_920_cast_fp16))[name = tensor("op_1208_cast_fp16")]; + tensor var_1209_to_fp16 = const()[name = tensor("op_1209_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1210_cast_fp16 = mul(x = var_1208_cast_fp16, y = var_1209_to_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1212_equation_0 = const()[name = tensor("op_1212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1212_cast_fp16 = einsum(equation = var_1212_equation_0, values = (var_1018_cast_fp16, var_924_cast_fp16))[name = tensor("op_1212_cast_fp16")]; + tensor var_1213_to_fp16 = const()[name = tensor("op_1213_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1214_cast_fp16 = mul(x = var_1212_cast_fp16, y = var_1213_to_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1216_equation_0 = const()[name = tensor("op_1216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1216_cast_fp16 = einsum(equation = var_1216_equation_0, values = (var_1018_cast_fp16, var_928_cast_fp16))[name = tensor("op_1216_cast_fp16")]; + tensor var_1217_to_fp16 = const()[name = tensor("op_1217_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1218_cast_fp16 = mul(x = var_1216_cast_fp16, y = var_1217_to_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1220_equation_0 = const()[name = tensor("op_1220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1220_cast_fp16 = einsum(equation = var_1220_equation_0, values = (var_1030_cast_fp16, var_932_cast_fp16))[name = tensor("op_1220_cast_fp16")]; + tensor var_1221_to_fp16 = const()[name = tensor("op_1221_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1222_cast_fp16 = mul(x = var_1220_cast_fp16, y = var_1221_to_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1224_equation_0 = const()[name = tensor("op_1224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1224_cast_fp16 = einsum(equation = var_1224_equation_0, values = (var_1030_cast_fp16, var_936_cast_fp16))[name = tensor("op_1224_cast_fp16")]; + tensor var_1225_to_fp16 = const()[name = tensor("op_1225_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1226_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1030_cast_fp16, var_940_cast_fp16))[name = tensor("op_1228_cast_fp16")]; + tensor var_1229_to_fp16 = const()[name = tensor("op_1229_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1230_cast_fp16 = mul(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1042_cast_fp16, var_944_cast_fp16))[name = tensor("op_1232_cast_fp16")]; + tensor var_1233_to_fp16 = const()[name = tensor("op_1233_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1234_cast_fp16 = mul(x = var_1232_cast_fp16, y = var_1233_to_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1042_cast_fp16, var_948_cast_fp16))[name = tensor("op_1236_cast_fp16")]; + tensor var_1237_to_fp16 = const()[name = tensor("op_1237_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1238_cast_fp16 = mul(x = var_1236_cast_fp16, y = var_1237_to_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1042_cast_fp16, var_952_cast_fp16))[name = tensor("op_1240_cast_fp16")]; + tensor var_1241_to_fp16 = const()[name = tensor("op_1241_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1242_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1054_cast_fp16, var_956_cast_fp16))[name = tensor("op_1244_cast_fp16")]; + tensor var_1245_to_fp16 = const()[name = tensor("op_1245_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1246_cast_fp16 = mul(x = var_1244_cast_fp16, y = var_1245_to_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1054_cast_fp16, var_960_cast_fp16))[name = tensor("op_1248_cast_fp16")]; + tensor var_1249_to_fp16 = const()[name = tensor("op_1249_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1250_cast_fp16 = mul(x = var_1248_cast_fp16, y = var_1249_to_fp16)[name = tensor("op_1250_cast_fp16")]; + tensor var_1252_equation_0 = const()[name = tensor("op_1252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1054_cast_fp16, var_964_cast_fp16))[name = tensor("op_1252_cast_fp16")]; + tensor var_1253_to_fp16 = const()[name = tensor("op_1253_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1254_cast_fp16 = mul(x = var_1252_cast_fp16, y = var_1253_to_fp16)[name = tensor("op_1254_cast_fp16")]; + tensor var_1256_equation_0 = const()[name = tensor("op_1256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1066_cast_fp16, var_968_cast_fp16))[name = tensor("op_1256_cast_fp16")]; + tensor var_1257_to_fp16 = const()[name = tensor("op_1257_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1258_cast_fp16 = mul(x = var_1256_cast_fp16, y = var_1257_to_fp16)[name = tensor("op_1258_cast_fp16")]; + tensor var_1260_equation_0 = const()[name = tensor("op_1260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1066_cast_fp16, var_972_cast_fp16))[name = tensor("op_1260_cast_fp16")]; + tensor var_1261_to_fp16 = const()[name = tensor("op_1261_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1262_cast_fp16 = mul(x = var_1260_cast_fp16, y = var_1261_to_fp16)[name = tensor("op_1262_cast_fp16")]; + tensor var_1264_equation_0 = const()[name = tensor("op_1264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1066_cast_fp16, var_976_cast_fp16))[name = tensor("op_1264_cast_fp16")]; + tensor var_1265_to_fp16 = const()[name = tensor("op_1265_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1266_cast_fp16 = mul(x = var_1264_cast_fp16, y = var_1265_to_fp16)[name = tensor("op_1266_cast_fp16")]; + tensor var_1268_equation_0 = const()[name = tensor("op_1268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1078_cast_fp16, var_980_cast_fp16))[name = tensor("op_1268_cast_fp16")]; + tensor var_1269_to_fp16 = const()[name = tensor("op_1269_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1270_cast_fp16 = mul(x = var_1268_cast_fp16, y = var_1269_to_fp16)[name = tensor("op_1270_cast_fp16")]; + tensor var_1272_equation_0 = const()[name = tensor("op_1272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1078_cast_fp16, var_984_cast_fp16))[name = tensor("op_1272_cast_fp16")]; + tensor var_1273_to_fp16 = const()[name = tensor("op_1273_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1274_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = tensor("op_1274_cast_fp16")]; + tensor var_1276_equation_0 = const()[name = tensor("op_1276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1078_cast_fp16, var_988_cast_fp16))[name = tensor("op_1276_cast_fp16")]; + tensor var_1277_to_fp16 = const()[name = tensor("op_1277_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1278_cast_fp16 = mul(x = var_1276_cast_fp16, y = var_1277_to_fp16)[name = tensor("op_1278_cast_fp16")]; + tensor var_1280_equation_0 = const()[name = tensor("op_1280_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1090_cast_fp16, var_992_cast_fp16))[name = tensor("op_1280_cast_fp16")]; + tensor var_1281_to_fp16 = const()[name = tensor("op_1281_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1282_cast_fp16 = mul(x = var_1280_cast_fp16, y = var_1281_to_fp16)[name = tensor("op_1282_cast_fp16")]; + tensor var_1284_equation_0 = const()[name = tensor("op_1284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1284_cast_fp16 = einsum(equation = var_1284_equation_0, values = (var_1090_cast_fp16, var_996_cast_fp16))[name = tensor("op_1284_cast_fp16")]; + tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1286_cast_fp16 = mul(x = var_1284_cast_fp16, y = var_1285_to_fp16)[name = tensor("op_1286_cast_fp16")]; + tensor var_1288_equation_0 = const()[name = tensor("op_1288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1288_cast_fp16 = einsum(equation = var_1288_equation_0, values = (var_1090_cast_fp16, var_1000_cast_fp16))[name = tensor("op_1288_cast_fp16")]; + tensor var_1289_to_fp16 = const()[name = tensor("op_1289_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1290_cast_fp16 = mul(x = var_1288_cast_fp16, y = var_1289_to_fp16)[name = tensor("op_1290_cast_fp16")]; + tensor var_1292_equation_0 = const()[name = tensor("op_1292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1292_cast_fp16 = einsum(equation = var_1292_equation_0, values = (var_1102_cast_fp16, var_1004_cast_fp16))[name = tensor("op_1292_cast_fp16")]; + tensor var_1293_to_fp16 = const()[name = tensor("op_1293_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1294_cast_fp16 = mul(x = var_1292_cast_fp16, y = var_1293_to_fp16)[name = tensor("op_1294_cast_fp16")]; + tensor var_1296_equation_0 = const()[name = tensor("op_1296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1296_cast_fp16 = einsum(equation = var_1296_equation_0, values = (var_1102_cast_fp16, var_1008_cast_fp16))[name = tensor("op_1296_cast_fp16")]; + tensor var_1297_to_fp16 = const()[name = tensor("op_1297_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1298_cast_fp16 = mul(x = var_1296_cast_fp16, y = var_1297_to_fp16)[name = tensor("op_1298_cast_fp16")]; + tensor var_1300_equation_0 = const()[name = tensor("op_1300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1300_cast_fp16 = einsum(equation = var_1300_equation_0, values = (var_1102_cast_fp16, var_1012_cast_fp16))[name = tensor("op_1300_cast_fp16")]; + tensor var_1301_to_fp16 = const()[name = tensor("op_1301_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1302_cast_fp16 = mul(x = var_1300_cast_fp16, y = var_1301_to_fp16)[name = tensor("op_1302_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_1210_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_1214_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_1218_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_1222_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_1226_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_1230_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_1234_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_1238_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1242_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1246_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1250_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1254_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1258_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1262_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1266_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1270_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1274_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1278_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1282_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1286_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1290_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1294_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1298_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_1302_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_1327_cast_fp16 = softmax(axis = var_779, x = aw_49_cast_fp16)[name = tensor("op_1327_cast_fp16")]; + tensor var_1328_cast_fp16 = softmax(axis = var_779, x = aw_51_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1329_cast_fp16 = softmax(axis = var_779, x = aw_53_cast_fp16)[name = tensor("op_1329_cast_fp16")]; + tensor var_1330_cast_fp16 = softmax(axis = var_779, x = aw_55_cast_fp16)[name = tensor("op_1330_cast_fp16")]; + tensor var_1331_cast_fp16 = softmax(axis = var_779, x = aw_57_cast_fp16)[name = tensor("op_1331_cast_fp16")]; + tensor var_1332_cast_fp16 = softmax(axis = var_779, x = aw_59_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1333_cast_fp16 = softmax(axis = var_779, x = aw_61_cast_fp16)[name = tensor("op_1333_cast_fp16")]; + tensor var_1334_cast_fp16 = softmax(axis = var_779, x = aw_63_cast_fp16)[name = tensor("op_1334_cast_fp16")]; + tensor var_1335_cast_fp16 = softmax(axis = var_779, x = aw_65_cast_fp16)[name = tensor("op_1335_cast_fp16")]; + tensor var_1336_cast_fp16 = softmax(axis = var_779, x = aw_67_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1337_cast_fp16 = softmax(axis = var_779, x = aw_69_cast_fp16)[name = tensor("op_1337_cast_fp16")]; + tensor var_1338_cast_fp16 = softmax(axis = var_779, x = aw_71_cast_fp16)[name = tensor("op_1338_cast_fp16")]; + tensor var_1339_cast_fp16 = softmax(axis = var_779, x = aw_73_cast_fp16)[name = tensor("op_1339_cast_fp16")]; + tensor var_1340_cast_fp16 = softmax(axis = var_779, x = aw_75_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1341_cast_fp16 = softmax(axis = var_779, x = aw_77_cast_fp16)[name = tensor("op_1341_cast_fp16")]; + tensor var_1342_cast_fp16 = softmax(axis = var_779, x = aw_79_cast_fp16)[name = tensor("op_1342_cast_fp16")]; + tensor var_1343_cast_fp16 = softmax(axis = var_779, x = aw_81_cast_fp16)[name = tensor("op_1343_cast_fp16")]; + tensor var_1344_cast_fp16 = softmax(axis = var_779, x = aw_83_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1345_cast_fp16 = softmax(axis = var_779, x = aw_85_cast_fp16)[name = tensor("op_1345_cast_fp16")]; + tensor var_1346_cast_fp16 = softmax(axis = var_779, x = aw_87_cast_fp16)[name = tensor("op_1346_cast_fp16")]; + tensor var_1347_cast_fp16 = softmax(axis = var_779, x = aw_89_cast_fp16)[name = tensor("op_1347_cast_fp16")]; + tensor var_1348_cast_fp16 = softmax(axis = var_779, x = aw_91_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1349_cast_fp16 = softmax(axis = var_779, x = aw_93_cast_fp16)[name = tensor("op_1349_cast_fp16")]; + tensor var_1350_cast_fp16 = softmax(axis = var_779, x = aw_cast_fp16)[name = tensor("op_1350_cast_fp16")]; + tensor var_1352_equation_0 = const()[name = tensor("op_1352_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1352_cast_fp16 = einsum(equation = var_1352_equation_0, values = (var_1112_cast_fp16, var_1327_cast_fp16))[name = tensor("op_1352_cast_fp16")]; + tensor var_1354_equation_0 = const()[name = tensor("op_1354_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1354_cast_fp16 = einsum(equation = var_1354_equation_0, values = (var_1112_cast_fp16, var_1328_cast_fp16))[name = tensor("op_1354_cast_fp16")]; + tensor var_1356_equation_0 = const()[name = tensor("op_1356_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1356_cast_fp16 = einsum(equation = var_1356_equation_0, values = (var_1112_cast_fp16, var_1329_cast_fp16))[name = tensor("op_1356_cast_fp16")]; + tensor var_1358_equation_0 = const()[name = tensor("op_1358_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1358_cast_fp16 = einsum(equation = var_1358_equation_0, values = (var_1124_cast_fp16, var_1330_cast_fp16))[name = tensor("op_1358_cast_fp16")]; + tensor var_1360_equation_0 = const()[name = tensor("op_1360_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1360_cast_fp16 = einsum(equation = var_1360_equation_0, values = (var_1124_cast_fp16, var_1331_cast_fp16))[name = tensor("op_1360_cast_fp16")]; + tensor var_1362_equation_0 = const()[name = tensor("op_1362_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1362_cast_fp16 = einsum(equation = var_1362_equation_0, values = (var_1124_cast_fp16, var_1332_cast_fp16))[name = tensor("op_1362_cast_fp16")]; + tensor var_1364_equation_0 = const()[name = tensor("op_1364_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1364_cast_fp16 = einsum(equation = var_1364_equation_0, values = (var_1136_cast_fp16, var_1333_cast_fp16))[name = tensor("op_1364_cast_fp16")]; + tensor var_1366_equation_0 = const()[name = tensor("op_1366_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1366_cast_fp16 = einsum(equation = var_1366_equation_0, values = (var_1136_cast_fp16, var_1334_cast_fp16))[name = tensor("op_1366_cast_fp16")]; + tensor var_1368_equation_0 = const()[name = tensor("op_1368_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1368_cast_fp16 = einsum(equation = var_1368_equation_0, values = (var_1136_cast_fp16, var_1335_cast_fp16))[name = tensor("op_1368_cast_fp16")]; + tensor var_1370_equation_0 = const()[name = tensor("op_1370_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1370_cast_fp16 = einsum(equation = var_1370_equation_0, values = (var_1148_cast_fp16, var_1336_cast_fp16))[name = tensor("op_1370_cast_fp16")]; + tensor var_1372_equation_0 = const()[name = tensor("op_1372_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1372_cast_fp16 = einsum(equation = var_1372_equation_0, values = (var_1148_cast_fp16, var_1337_cast_fp16))[name = tensor("op_1372_cast_fp16")]; + tensor var_1374_equation_0 = const()[name = tensor("op_1374_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1374_cast_fp16 = einsum(equation = var_1374_equation_0, values = (var_1148_cast_fp16, var_1338_cast_fp16))[name = tensor("op_1374_cast_fp16")]; + tensor var_1376_equation_0 = const()[name = tensor("op_1376_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1376_cast_fp16 = einsum(equation = var_1376_equation_0, values = (var_1160_cast_fp16, var_1339_cast_fp16))[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_equation_0 = const()[name = tensor("op_1378_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1378_cast_fp16 = einsum(equation = var_1378_equation_0, values = (var_1160_cast_fp16, var_1340_cast_fp16))[name = tensor("op_1378_cast_fp16")]; + tensor var_1380_equation_0 = const()[name = tensor("op_1380_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1380_cast_fp16 = einsum(equation = var_1380_equation_0, values = (var_1160_cast_fp16, var_1341_cast_fp16))[name = tensor("op_1380_cast_fp16")]; + tensor var_1382_equation_0 = const()[name = tensor("op_1382_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1382_cast_fp16 = einsum(equation = var_1382_equation_0, values = (var_1172_cast_fp16, var_1342_cast_fp16))[name = tensor("op_1382_cast_fp16")]; + tensor var_1384_equation_0 = const()[name = tensor("op_1384_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1384_cast_fp16 = einsum(equation = var_1384_equation_0, values = (var_1172_cast_fp16, var_1343_cast_fp16))[name = tensor("op_1384_cast_fp16")]; + tensor var_1386_equation_0 = const()[name = tensor("op_1386_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1386_cast_fp16 = einsum(equation = var_1386_equation_0, values = (var_1172_cast_fp16, var_1344_cast_fp16))[name = tensor("op_1386_cast_fp16")]; + tensor var_1388_equation_0 = const()[name = tensor("op_1388_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1388_cast_fp16 = einsum(equation = var_1388_equation_0, values = (var_1184_cast_fp16, var_1345_cast_fp16))[name = tensor("op_1388_cast_fp16")]; + tensor var_1390_equation_0 = const()[name = tensor("op_1390_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1390_cast_fp16 = einsum(equation = var_1390_equation_0, values = (var_1184_cast_fp16, var_1346_cast_fp16))[name = tensor("op_1390_cast_fp16")]; + tensor var_1392_equation_0 = const()[name = tensor("op_1392_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1392_cast_fp16 = einsum(equation = var_1392_equation_0, values = (var_1184_cast_fp16, var_1347_cast_fp16))[name = tensor("op_1392_cast_fp16")]; + tensor var_1394_equation_0 = const()[name = tensor("op_1394_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1394_cast_fp16 = einsum(equation = var_1394_equation_0, values = (var_1196_cast_fp16, var_1348_cast_fp16))[name = tensor("op_1394_cast_fp16")]; + tensor var_1396_equation_0 = const()[name = tensor("op_1396_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1396_cast_fp16 = einsum(equation = var_1396_equation_0, values = (var_1196_cast_fp16, var_1349_cast_fp16))[name = tensor("op_1396_cast_fp16")]; + tensor var_1398_equation_0 = const()[name = tensor("op_1398_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1398_cast_fp16 = einsum(equation = var_1398_equation_0, values = (var_1196_cast_fp16, var_1350_cast_fp16))[name = tensor("op_1398_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_779, interleave = x_27_interleave_0, values = (var_1352_cast_fp16, var_1354_cast_fp16, var_1356_cast_fp16, var_1358_cast_fp16, var_1360_cast_fp16, var_1362_cast_fp16, var_1364_cast_fp16, var_1366_cast_fp16, var_1368_cast_fp16, var_1370_cast_fp16, var_1372_cast_fp16, var_1374_cast_fp16, var_1376_cast_fp16, var_1378_cast_fp16, var_1380_cast_fp16, var_1382_cast_fp16, var_1384_cast_fp16, var_1386_cast_fp16, var_1388_cast_fp16, var_1390_cast_fp16, var_1392_cast_fp16, var_1394_cast_fp16, var_1396_cast_fp16, var_1398_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1403 = const()[name = tensor("op_1403"), val = tensor([1, 3072, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1403, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1406 = const()[name = tensor("op_1406"), val = tensor([1, 1])]; + tensor var_1408 = const()[name = tensor("op_1408"), val = tensor([1, 1])]; + tensor attention_output_pad_type_0 = const()[name = tensor("attention_output_pad_type_0"), val = tensor("custom")]; + tensor attention_output_pad_0 = const()[name = tensor("attention_output_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232803776)))]; + tensor attention_output_cast_fp16 = conv(dilations = var_1408, groups = var_779, pad = attention_output_pad_0, pad_type = attention_output_pad_type_0, strides = var_1406, weight = blocks_1_attn_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678208)))]; + tensor x_eps_cast_fp16 = concat(axis = var_779, interleave = x_eps_interleave_0, values = (x_29_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_782, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1434_to_fp16 = const()[name = tensor("op_1434_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1434_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678400)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1445 = const()[name = tensor("op_1445"), val = tensor([1, 1])]; + tensor var_1447 = const()[name = tensor("op_1447"), val = tensor([1, 1])]; + tensor input_17_pad_type_0 = const()[name = tensor("input_17_pad_type_0"), val = tensor("custom")]; + tensor input_17_pad_0 = const()[name = tensor("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251684608)))]; + tensor input_17_cast_fp16 = conv(dilations = var_1447, groups = var_779, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_1445, weight = blocks_1_mlp_fc_1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1451 = const()[name = tensor("op_1451"), val = tensor([1, 1])]; + tensor var_1453 = const()[name = tensor("op_1453"), val = tensor([1, 1])]; + tensor x_fc_2_pad_type_0 = const()[name = tensor("x_fc_2_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_pad_0 = const()[name = tensor("x_fc_2_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302016320)))]; + tensor x_fc_2_cast_fp16 = conv(dilations = var_1453, groups = var_779, pad = x_fc_2_pad_0, pad_type = x_fc_2_pad_type_0, strides = var_1451, weight = blocks_1_mlp_fc_2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_1456_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1456_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_1456_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_1459 = const()[name = tensor("op_1459"), val = tensor([1, 1])]; + tensor var_1461 = const()[name = tensor("op_1461"), val = tensor([1, 1])]; + tensor var_1463_pad_type_0 = const()[name = tensor("op_1463_pad_type_0"), val = tensor("custom")]; + tensor var_1463_pad_0 = const()[name = tensor("op_1463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(352348032)))]; + tensor var_1463_cast_fp16 = conv(dilations = var_1461, groups = var_779, pad = var_1463_pad_0, pad_type = var_1463_pad_type_0, strides = var_1459, weight = blocks_1_mlp_proj_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_1463_cast_fp16")]; + tensor new_x = add(x = var_1463_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_1464_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_v_cache_0, new_k_cache_1, new_v_cache_1); +} \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk12.mlmodelc/weights/weight.bin b/Llama-3.2-3B-Instruct_chunk12.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..19e4bcbb29ffc6423e122439ad9ab823c1e478e2 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk12.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b78ea2d679f5c65ebc0a051180df02861df9dfad72fdbd6d7da795e6effcbd4 +size 402679744 diff --git a/Llama-3.2-3B-Instruct_chunk13.mlmodelc/analytics/coremldata.bin b/Llama-3.2-3B-Instruct_chunk13.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a63af39cde8e590e41fffd270ab8aede737490d --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk13.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf21e446e7587de3fd840eae95f3e79729298df568725552f7ef5fd8f954e58c +size 243 diff --git a/Llama-3.2-3B-Instruct_chunk13.mlmodelc/coremldata.bin b/Llama-3.2-3B-Instruct_chunk13.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..3fed05170d981b8582c9421ec7550f748512caf2 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk13.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b45f96f9ba201e16f197a78412041f41d2ac869df9ad95ef03af7662e7d940 +size 653 diff --git a/Llama-3.2-3B-Instruct_chunk13.mlmodelc/metadata.json b/Llama-3.2-3B-Instruct_chunk13.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..042152024e0a7d922ca0457b0fcb16c0a03410bb --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk13.mlmodelc/metadata.json @@ -0,0 +1,178 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 14, + "Ios16.mul" : 70, + "SliceByIndex" : 88, + "Transpose" : 2, + "Ios16.einsum" : 96, + "Ios16.conv" : 14, + "Ios16.add" : 56, + "Ios16.realDiv" : 4, + "Ios16.softmax" : 48, + "Ios16.reduceL2Norm" : 4, + "Ios16.reshape" : 14, + "Ios16.silu" : 2 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 512, 1, 64]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_3_2_3B_Instruct_2024_11_09_16_14_37_chunk13", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk13.mlmodelc/model.mil b/Llama-3.2-3B-Instruct_chunk13.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..78594b4291dc45ae43652f9a31200581b19ad3c6 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk13.mlmodelc/model.mil @@ -0,0 +1,956 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { + tensor var_13 = const()[name = tensor("op_13"), val = tensor(-1)]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-2)]; + tensor var_19 = const()[name = tensor("op_19"), val = tensor(-3)]; + tensor var_52 = const()[name = tensor("op_52"), val = tensor(1)]; + tensor var_55 = const()[name = tensor("op_55"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_52, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_55, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_79_to_fp16 = const()[name = tensor("op_79_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_79_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_100 = const()[name = tensor("op_100"), val = tensor([1, 3072, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_100, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_103 = const()[name = tensor("op_103"), val = tensor([1, 1])]; + tensor var_105 = const()[name = tensor("op_105"), val = tensor([1, 1])]; + tensor q_1_pad_type_0 = const()[name = tensor("q_1_pad_type_0"), val = tensor("custom")]; + tensor q_1_pad_0 = const()[name = tensor("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6464)))]; + tensor q_1_cast_fp16 = conv(dilations = var_105, groups = var_52, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = var_103, weight = blocks_0_attn_q_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_109 = const()[name = tensor("op_109"), val = tensor([1, 1])]; + tensor var_111 = const()[name = tensor("op_111"), val = tensor([1, 1])]; + tensor k_1_pad_type_0 = const()[name = tensor("k_1_pad_type_0"), val = tensor("custom")]; + tensor k_1_pad_0 = const()[name = tensor("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18880896)))]; + tensor k_1_cast_fp16 = conv(dilations = var_111, groups = var_52, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = var_109, weight = blocks_0_attn_k_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_115 = const()[name = tensor("op_115"), val = tensor([1, 1])]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor v_1_pad_type_0 = const()[name = tensor("v_1_pad_type_0"), val = tensor("custom")]; + tensor v_1_pad_0 = const()[name = tensor("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25172416)))]; + tensor v_1_cast_fp16 = conv(dilations = var_117, groups = var_52, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = var_115, weight = blocks_0_attn_v_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_120 = const()[name = tensor("op_120"), val = tensor([1, 24, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_120, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_122 = const()[name = tensor("op_122"), val = tensor([1, -1, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_122, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_136_begin_0 = const()[name = tensor("op_136_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_136_end_0 = const()[name = tensor("op_136_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_136_end_mask_0 = const()[name = tensor("op_136_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_136_cast_fp16 = slice_by_index(begin = var_136_begin_0, end = var_136_end_0, end_mask = var_136_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_136_cast_fp16")]; + tensor var_142_begin_0 = const()[name = tensor("op_142_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_142_end_0 = const()[name = tensor("op_142_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_142_end_mask_0 = const()[name = tensor("op_142_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_142_cast_fp16 = slice_by_index(begin = var_142_begin_0, end = var_142_end_0, end_mask = var_142_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_142_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_144_cast_fp16 = mul(x = var_142_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_144_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_17, interleave = rotated_1_interleave_0, values = (var_144_cast_fp16, var_136_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_147_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_147_cast_fp16")]; + tensor var_148_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_148_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_147_cast_fp16, y = var_148_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor var_167_begin_0 = const()[name = tensor("op_167_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_167_end_0 = const()[name = tensor("op_167_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_167_end_mask_0 = const()[name = tensor("op_167_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_167_cast_fp16 = slice_by_index(begin = var_167_begin_0, end = var_167_end_0, end_mask = var_167_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_167_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_169_cast_fp16 = mul(x = var_167_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_169_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_17, interleave = rotated_3_interleave_0, values = (var_169_cast_fp16, var_161_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_172_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_172_cast_fp16")]; + tensor var_173_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_173_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_172_cast_fp16, y = var_173_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_176 = const()[name = tensor("op_176"), val = tensor([1, -1, 1, 64])]; + tensor k_7_cast_fp16 = reshape(shape = var_176, x = roped_3_cast_fp16)[name = tensor("k_7_cast_fp16")]; + tensor var_178 = const()[name = tensor("op_178"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_178, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_11_interleave_0 = const()[name = tensor("k_11_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_9_perm_0, x = k_7_cast_fp16)[name = tensor("transpose_1")]; + tensor k_11_cast_fp16 = concat(axis = var_19, interleave = k_11_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_11_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_13, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1, 3072, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_186, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_191_begin_0 = const()[name = tensor("op_191_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_191_end_0 = const()[name = tensor("op_191_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_191_end_mask_0 = const()[name = tensor("op_191_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_191_cast_fp16 = slice_by_index(begin = var_191_begin_0, end = var_191_end_0, end_mask = var_191_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_191_cast_fp16")]; + tensor var_195_begin_0 = const()[name = tensor("op_195_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_195_end_0 = const()[name = tensor("op_195_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_195_end_mask_0 = const()[name = tensor("op_195_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_195_cast_fp16 = slice_by_index(begin = var_195_begin_0, end = var_195_end_0, end_mask = var_195_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_195_cast_fp16")]; + tensor var_199_begin_0 = const()[name = tensor("op_199_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_199_end_0 = const()[name = tensor("op_199_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_199_end_mask_0 = const()[name = tensor("op_199_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_199_cast_fp16 = slice_by_index(begin = var_199_begin_0, end = var_199_end_0, end_mask = var_199_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_199_cast_fp16")]; + tensor var_203_begin_0 = const()[name = tensor("op_203_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_203_end_0 = const()[name = tensor("op_203_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_203_end_mask_0 = const()[name = tensor("op_203_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_203_cast_fp16 = slice_by_index(begin = var_203_begin_0, end = var_203_end_0, end_mask = var_203_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_203_cast_fp16")]; + tensor var_207_begin_0 = const()[name = tensor("op_207_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_207_end_0 = const()[name = tensor("op_207_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_207_end_mask_0 = const()[name = tensor("op_207_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = var_207_end_0, end_mask = var_207_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_207_cast_fp16")]; + tensor var_211_begin_0 = const()[name = tensor("op_211_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_211_end_0 = const()[name = tensor("op_211_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_211_end_mask_0 = const()[name = tensor("op_211_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_211_cast_fp16 = slice_by_index(begin = var_211_begin_0, end = var_211_end_0, end_mask = var_211_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_211_cast_fp16")]; + tensor var_215_begin_0 = const()[name = tensor("op_215_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_215_end_0 = const()[name = tensor("op_215_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_215_end_mask_0 = const()[name = tensor("op_215_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_215_cast_fp16 = slice_by_index(begin = var_215_begin_0, end = var_215_end_0, end_mask = var_215_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_215_cast_fp16")]; + tensor var_219_begin_0 = const()[name = tensor("op_219_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_219_end_0 = const()[name = tensor("op_219_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_219_end_mask_0 = const()[name = tensor("op_219_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_219_cast_fp16 = slice_by_index(begin = var_219_begin_0, end = var_219_end_0, end_mask = var_219_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_219_cast_fp16")]; + tensor var_223_begin_0 = const()[name = tensor("op_223_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_223_end_0 = const()[name = tensor("op_223_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_223_end_mask_0 = const()[name = tensor("op_223_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_223_cast_fp16 = slice_by_index(begin = var_223_begin_0, end = var_223_end_0, end_mask = var_223_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_223_cast_fp16")]; + tensor var_227_begin_0 = const()[name = tensor("op_227_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_227_end_0 = const()[name = tensor("op_227_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_227_end_mask_0 = const()[name = tensor("op_227_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_227_cast_fp16")]; + tensor var_231_begin_0 = const()[name = tensor("op_231_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_231_end_0 = const()[name = tensor("op_231_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_231_end_mask_0 = const()[name = tensor("op_231_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_231_cast_fp16 = slice_by_index(begin = var_231_begin_0, end = var_231_end_0, end_mask = var_231_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_231_cast_fp16")]; + tensor var_235_begin_0 = const()[name = tensor("op_235_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_235_end_0 = const()[name = tensor("op_235_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_235_end_mask_0 = const()[name = tensor("op_235_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = var_235_end_0, end_mask = var_235_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_235_cast_fp16")]; + tensor var_239_begin_0 = const()[name = tensor("op_239_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_239_end_0 = const()[name = tensor("op_239_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_239_end_mask_0 = const()[name = tensor("op_239_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_239_cast_fp16")]; + tensor var_243_begin_0 = const()[name = tensor("op_243_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_243_end_0 = const()[name = tensor("op_243_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_243_end_mask_0 = const()[name = tensor("op_243_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_243_cast_fp16")]; + tensor var_247_begin_0 = const()[name = tensor("op_247_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_247_end_0 = const()[name = tensor("op_247_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_247_end_mask_0 = const()[name = tensor("op_247_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_247_cast_fp16")]; + tensor var_251_begin_0 = const()[name = tensor("op_251_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_251_end_0 = const()[name = tensor("op_251_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_251_end_mask_0 = const()[name = tensor("op_251_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_251_cast_fp16")]; + tensor var_255_begin_0 = const()[name = tensor("op_255_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_255_end_0 = const()[name = tensor("op_255_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_255_end_mask_0 = const()[name = tensor("op_255_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_255_cast_fp16")]; + tensor var_259_begin_0 = const()[name = tensor("op_259_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_259_end_0 = const()[name = tensor("op_259_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_259_end_mask_0 = const()[name = tensor("op_259_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_263_cast_fp16")]; + tensor var_267_begin_0 = const()[name = tensor("op_267_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_267_end_0 = const()[name = tensor("op_267_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_267_end_mask_0 = const()[name = tensor("op_267_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_267_cast_fp16")]; + tensor var_271_begin_0 = const()[name = tensor("op_271_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_271_end_0 = const()[name = tensor("op_271_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_271_end_mask_0 = const()[name = tensor("op_271_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_271_cast_fp16")]; + tensor var_275_begin_0 = const()[name = tensor("op_275_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_275_end_0 = const()[name = tensor("op_275_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_275_end_mask_0 = const()[name = tensor("op_275_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_275_cast_fp16")]; + tensor var_279_begin_0 = const()[name = tensor("op_279_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_279_end_0 = const()[name = tensor("op_279_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_279_end_mask_0 = const()[name = tensor("op_279_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_279_cast_fp16")]; + tensor var_283_begin_0 = const()[name = tensor("op_283_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_283_end_0 = const()[name = tensor("op_283_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_283_end_mask_0 = const()[name = tensor("op_283_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_283_cast_fp16")]; + tensor var_289_begin_0 = const()[name = tensor("op_289_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_289_end_0 = const()[name = tensor("op_289_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_289_end_mask_0 = const()[name = tensor("op_289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_289_cast_fp16")]; + tensor var_301_begin_0 = const()[name = tensor("op_301_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_301_end_0 = const()[name = tensor("op_301_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_301_end_mask_0 = const()[name = tensor("op_301_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_301_cast_fp16")]; + tensor var_313_begin_0 = const()[name = tensor("op_313_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_313_end_0 = const()[name = tensor("op_313_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_313_end_mask_0 = const()[name = tensor("op_313_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_313_cast_fp16 = slice_by_index(begin = var_313_begin_0, end = var_313_end_0, end_mask = var_313_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_313_cast_fp16")]; + tensor var_325_begin_0 = const()[name = tensor("op_325_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_325_end_0 = const()[name = tensor("op_325_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_325_end_mask_0 = const()[name = tensor("op_325_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_325_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_349_begin_0 = const()[name = tensor("op_349_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_349_end_0 = const()[name = tensor("op_349_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_349_end_mask_0 = const()[name = tensor("op_349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_349_cast_fp16 = slice_by_index(begin = var_349_begin_0, end = var_349_end_0, end_mask = var_349_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_349_cast_fp16")]; + tensor var_361_begin_0 = const()[name = tensor("op_361_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_361_end_0 = const()[name = tensor("op_361_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_361_end_mask_0 = const()[name = tensor("op_361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_361_cast_fp16 = slice_by_index(begin = var_361_begin_0, end = var_361_end_0, end_mask = var_361_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_361_cast_fp16")]; + tensor var_373_begin_0 = const()[name = tensor("op_373_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_373_end_0 = const()[name = tensor("op_373_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_373_end_mask_0 = const()[name = tensor("op_373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_373_cast_fp16")]; + tensor var_383_begin_0 = const()[name = tensor("op_383_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_383_end_0 = const()[name = tensor("op_383_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_383_end_mask_0 = const()[name = tensor("op_383_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_383_cast_fp16")]; + tensor var_395_begin_0 = const()[name = tensor("op_395_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_395_end_0 = const()[name = tensor("op_395_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_395_end_mask_0 = const()[name = tensor("op_395_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_395_cast_fp16")]; + tensor var_407_begin_0 = const()[name = tensor("op_407_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_407_end_0 = const()[name = tensor("op_407_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_407_end_mask_0 = const()[name = tensor("op_407_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_407_cast_fp16 = slice_by_index(begin = var_407_begin_0, end = var_407_end_0, end_mask = var_407_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_407_cast_fp16")]; + tensor var_419_begin_0 = const()[name = tensor("op_419_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_419_end_0 = const()[name = tensor("op_419_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_419_end_mask_0 = const()[name = tensor("op_419_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_419_cast_fp16 = slice_by_index(begin = var_419_begin_0, end = var_419_end_0, end_mask = var_419_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_419_cast_fp16")]; + tensor var_431_begin_0 = const()[name = tensor("op_431_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_431_end_0 = const()[name = tensor("op_431_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_431_end_mask_0 = const()[name = tensor("op_431_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_431_cast_fp16 = slice_by_index(begin = var_431_begin_0, end = var_431_end_0, end_mask = var_431_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_431_cast_fp16")]; + tensor var_443_begin_0 = const()[name = tensor("op_443_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_443_end_0 = const()[name = tensor("op_443_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_443_end_mask_0 = const()[name = tensor("op_443_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_443_cast_fp16 = slice_by_index(begin = var_443_begin_0, end = var_443_end_0, end_mask = var_443_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_443_cast_fp16")]; + tensor var_455_begin_0 = const()[name = tensor("op_455_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_455_end_0 = const()[name = tensor("op_455_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_455_end_mask_0 = const()[name = tensor("op_455_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_455_cast_fp16")]; + tensor var_467_begin_0 = const()[name = tensor("op_467_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_467_end_0 = const()[name = tensor("op_467_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_467_end_mask_0 = const()[name = tensor("op_467_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_467_cast_fp16 = slice_by_index(begin = var_467_begin_0, end = var_467_end_0, end_mask = var_467_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_467_cast_fp16")]; + tensor var_479_equation_0 = const()[name = tensor("op_479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_289_cast_fp16, var_191_cast_fp16))[name = tensor("op_479_cast_fp16")]; + tensor var_480_to_fp16 = const()[name = tensor("op_480_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_481_cast_fp16 = mul(x = var_479_cast_fp16, y = var_480_to_fp16)[name = tensor("op_481_cast_fp16")]; + tensor var_483_equation_0 = const()[name = tensor("op_483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_289_cast_fp16, var_195_cast_fp16))[name = tensor("op_483_cast_fp16")]; + tensor var_484_to_fp16 = const()[name = tensor("op_484_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_485_cast_fp16 = mul(x = var_483_cast_fp16, y = var_484_to_fp16)[name = tensor("op_485_cast_fp16")]; + tensor var_487_equation_0 = const()[name = tensor("op_487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_487_cast_fp16 = einsum(equation = var_487_equation_0, values = (var_289_cast_fp16, var_199_cast_fp16))[name = tensor("op_487_cast_fp16")]; + tensor var_488_to_fp16 = const()[name = tensor("op_488_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_489_cast_fp16 = mul(x = var_487_cast_fp16, y = var_488_to_fp16)[name = tensor("op_489_cast_fp16")]; + tensor var_491_equation_0 = const()[name = tensor("op_491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_491_cast_fp16 = einsum(equation = var_491_equation_0, values = (var_301_cast_fp16, var_203_cast_fp16))[name = tensor("op_491_cast_fp16")]; + tensor var_492_to_fp16 = const()[name = tensor("op_492_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_493_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = tensor("op_493_cast_fp16")]; + tensor var_495_equation_0 = const()[name = tensor("op_495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_495_cast_fp16 = einsum(equation = var_495_equation_0, values = (var_301_cast_fp16, var_207_cast_fp16))[name = tensor("op_495_cast_fp16")]; + tensor var_496_to_fp16 = const()[name = tensor("op_496_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_497_cast_fp16 = mul(x = var_495_cast_fp16, y = var_496_to_fp16)[name = tensor("op_497_cast_fp16")]; + tensor var_499_equation_0 = const()[name = tensor("op_499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_499_cast_fp16 = einsum(equation = var_499_equation_0, values = (var_301_cast_fp16, var_211_cast_fp16))[name = tensor("op_499_cast_fp16")]; + tensor var_500_to_fp16 = const()[name = tensor("op_500_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_501_cast_fp16 = mul(x = var_499_cast_fp16, y = var_500_to_fp16)[name = tensor("op_501_cast_fp16")]; + tensor var_503_equation_0 = const()[name = tensor("op_503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_503_cast_fp16 = einsum(equation = var_503_equation_0, values = (var_313_cast_fp16, var_215_cast_fp16))[name = tensor("op_503_cast_fp16")]; + tensor var_504_to_fp16 = const()[name = tensor("op_504_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_505_cast_fp16 = mul(x = var_503_cast_fp16, y = var_504_to_fp16)[name = tensor("op_505_cast_fp16")]; + tensor var_507_equation_0 = const()[name = tensor("op_507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_507_cast_fp16 = einsum(equation = var_507_equation_0, values = (var_313_cast_fp16, var_219_cast_fp16))[name = tensor("op_507_cast_fp16")]; + tensor var_508_to_fp16 = const()[name = tensor("op_508_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16, y = var_508_to_fp16)[name = tensor("op_509_cast_fp16")]; + tensor var_511_equation_0 = const()[name = tensor("op_511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_511_cast_fp16 = einsum(equation = var_511_equation_0, values = (var_313_cast_fp16, var_223_cast_fp16))[name = tensor("op_511_cast_fp16")]; + tensor var_512_to_fp16 = const()[name = tensor("op_512_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_513_cast_fp16 = mul(x = var_511_cast_fp16, y = var_512_to_fp16)[name = tensor("op_513_cast_fp16")]; + tensor var_515_equation_0 = const()[name = tensor("op_515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_515_cast_fp16 = einsum(equation = var_515_equation_0, values = (var_325_cast_fp16, var_227_cast_fp16))[name = tensor("op_515_cast_fp16")]; + tensor var_516_to_fp16 = const()[name = tensor("op_516_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_517_cast_fp16 = mul(x = var_515_cast_fp16, y = var_516_to_fp16)[name = tensor("op_517_cast_fp16")]; + tensor var_519_equation_0 = const()[name = tensor("op_519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_519_cast_fp16 = einsum(equation = var_519_equation_0, values = (var_325_cast_fp16, var_231_cast_fp16))[name = tensor("op_519_cast_fp16")]; + tensor var_520_to_fp16 = const()[name = tensor("op_520_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_521_cast_fp16 = mul(x = var_519_cast_fp16, y = var_520_to_fp16)[name = tensor("op_521_cast_fp16")]; + tensor var_523_equation_0 = const()[name = tensor("op_523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_325_cast_fp16, var_235_cast_fp16))[name = tensor("op_523_cast_fp16")]; + tensor var_524_to_fp16 = const()[name = tensor("op_524_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_525_cast_fp16 = mul(x = var_523_cast_fp16, y = var_524_to_fp16)[name = tensor("op_525_cast_fp16")]; + tensor var_527_equation_0 = const()[name = tensor("op_527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_337_cast_fp16, var_239_cast_fp16))[name = tensor("op_527_cast_fp16")]; + tensor var_528_to_fp16 = const()[name = tensor("op_528_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_529_cast_fp16 = mul(x = var_527_cast_fp16, y = var_528_to_fp16)[name = tensor("op_529_cast_fp16")]; + tensor var_531_equation_0 = const()[name = tensor("op_531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_337_cast_fp16, var_243_cast_fp16))[name = tensor("op_531_cast_fp16")]; + tensor var_532_to_fp16 = const()[name = tensor("op_532_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_533_cast_fp16 = mul(x = var_531_cast_fp16, y = var_532_to_fp16)[name = tensor("op_533_cast_fp16")]; + tensor var_535_equation_0 = const()[name = tensor("op_535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_337_cast_fp16, var_247_cast_fp16))[name = tensor("op_535_cast_fp16")]; + tensor var_536_to_fp16 = const()[name = tensor("op_536_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_537_cast_fp16 = mul(x = var_535_cast_fp16, y = var_536_to_fp16)[name = tensor("op_537_cast_fp16")]; + tensor var_539_equation_0 = const()[name = tensor("op_539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_539_cast_fp16 = einsum(equation = var_539_equation_0, values = (var_349_cast_fp16, var_251_cast_fp16))[name = tensor("op_539_cast_fp16")]; + tensor var_540_to_fp16 = const()[name = tensor("op_540_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_541_cast_fp16 = mul(x = var_539_cast_fp16, y = var_540_to_fp16)[name = tensor("op_541_cast_fp16")]; + tensor var_543_equation_0 = const()[name = tensor("op_543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_543_cast_fp16 = einsum(equation = var_543_equation_0, values = (var_349_cast_fp16, var_255_cast_fp16))[name = tensor("op_543_cast_fp16")]; + tensor var_544_to_fp16 = const()[name = tensor("op_544_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_545_cast_fp16 = mul(x = var_543_cast_fp16, y = var_544_to_fp16)[name = tensor("op_545_cast_fp16")]; + tensor var_547_equation_0 = const()[name = tensor("op_547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_349_cast_fp16, var_259_cast_fp16))[name = tensor("op_547_cast_fp16")]; + tensor var_548_to_fp16 = const()[name = tensor("op_548_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_549_cast_fp16 = mul(x = var_547_cast_fp16, y = var_548_to_fp16)[name = tensor("op_549_cast_fp16")]; + tensor var_551_equation_0 = const()[name = tensor("op_551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_361_cast_fp16, var_263_cast_fp16))[name = tensor("op_551_cast_fp16")]; + tensor var_552_to_fp16 = const()[name = tensor("op_552_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_553_cast_fp16 = mul(x = var_551_cast_fp16, y = var_552_to_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_555_equation_0 = const()[name = tensor("op_555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_361_cast_fp16, var_267_cast_fp16))[name = tensor("op_555_cast_fp16")]; + tensor var_556_to_fp16 = const()[name = tensor("op_556_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_557_cast_fp16 = mul(x = var_555_cast_fp16, y = var_556_to_fp16)[name = tensor("op_557_cast_fp16")]; + tensor var_559_equation_0 = const()[name = tensor("op_559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_361_cast_fp16, var_271_cast_fp16))[name = tensor("op_559_cast_fp16")]; + tensor var_560_to_fp16 = const()[name = tensor("op_560_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = var_560_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor var_563_equation_0 = const()[name = tensor("op_563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_373_cast_fp16, var_275_cast_fp16))[name = tensor("op_563_cast_fp16")]; + tensor var_564_to_fp16 = const()[name = tensor("op_564_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_565_cast_fp16 = mul(x = var_563_cast_fp16, y = var_564_to_fp16)[name = tensor("op_565_cast_fp16")]; + tensor var_567_equation_0 = const()[name = tensor("op_567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_373_cast_fp16, var_279_cast_fp16))[name = tensor("op_567_cast_fp16")]; + tensor var_568_to_fp16 = const()[name = tensor("op_568_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_569_cast_fp16 = mul(x = var_567_cast_fp16, y = var_568_to_fp16)[name = tensor("op_569_cast_fp16")]; + tensor var_571_equation_0 = const()[name = tensor("op_571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_373_cast_fp16, var_283_cast_fp16))[name = tensor("op_571_cast_fp16")]; + tensor var_572_to_fp16 = const()[name = tensor("op_572_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_573_cast_fp16 = mul(x = var_571_cast_fp16, y = var_572_to_fp16)[name = tensor("op_573_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_481_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_485_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_489_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_493_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_497_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_501_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_505_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_509_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_513_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_517_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_521_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_525_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_529_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_533_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_537_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_541_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_545_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_549_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_553_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_557_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_561_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_565_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_569_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_573_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor var_598_cast_fp16 = softmax(axis = var_52, x = aw_1_cast_fp16)[name = tensor("op_598_cast_fp16")]; + tensor var_599_cast_fp16 = softmax(axis = var_52, x = aw_3_cast_fp16)[name = tensor("op_599_cast_fp16")]; + tensor var_600_cast_fp16 = softmax(axis = var_52, x = aw_5_cast_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_601_cast_fp16 = softmax(axis = var_52, x = aw_7_cast_fp16)[name = tensor("op_601_cast_fp16")]; + tensor var_602_cast_fp16 = softmax(axis = var_52, x = aw_9_cast_fp16)[name = tensor("op_602_cast_fp16")]; + tensor var_603_cast_fp16 = softmax(axis = var_52, x = aw_11_cast_fp16)[name = tensor("op_603_cast_fp16")]; + tensor var_604_cast_fp16 = softmax(axis = var_52, x = aw_13_cast_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_605_cast_fp16 = softmax(axis = var_52, x = aw_15_cast_fp16)[name = tensor("op_605_cast_fp16")]; + tensor var_606_cast_fp16 = softmax(axis = var_52, x = aw_17_cast_fp16)[name = tensor("op_606_cast_fp16")]; + tensor var_607_cast_fp16 = softmax(axis = var_52, x = aw_19_cast_fp16)[name = tensor("op_607_cast_fp16")]; + tensor var_608_cast_fp16 = softmax(axis = var_52, x = aw_21_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_609_cast_fp16 = softmax(axis = var_52, x = aw_23_cast_fp16)[name = tensor("op_609_cast_fp16")]; + tensor var_610_cast_fp16 = softmax(axis = var_52, x = aw_25_cast_fp16)[name = tensor("op_610_cast_fp16")]; + tensor var_611_cast_fp16 = softmax(axis = var_52, x = aw_27_cast_fp16)[name = tensor("op_611_cast_fp16")]; + tensor var_612_cast_fp16 = softmax(axis = var_52, x = aw_29_cast_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_613_cast_fp16 = softmax(axis = var_52, x = aw_31_cast_fp16)[name = tensor("op_613_cast_fp16")]; + tensor var_614_cast_fp16 = softmax(axis = var_52, x = aw_33_cast_fp16)[name = tensor("op_614_cast_fp16")]; + tensor var_615_cast_fp16 = softmax(axis = var_52, x = aw_35_cast_fp16)[name = tensor("op_615_cast_fp16")]; + tensor var_616_cast_fp16 = softmax(axis = var_52, x = aw_37_cast_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_617_cast_fp16 = softmax(axis = var_52, x = aw_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618_cast_fp16 = softmax(axis = var_52, x = aw_41_cast_fp16)[name = tensor("op_618_cast_fp16")]; + tensor var_619_cast_fp16 = softmax(axis = var_52, x = aw_43_cast_fp16)[name = tensor("op_619_cast_fp16")]; + tensor var_620_cast_fp16 = softmax(axis = var_52, x = aw_45_cast_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_621_cast_fp16 = softmax(axis = var_52, x = aw_47_cast_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_623_equation_0 = const()[name = tensor("op_623_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_383_cast_fp16, var_598_cast_fp16))[name = tensor("op_623_cast_fp16")]; + tensor var_625_equation_0 = const()[name = tensor("op_625_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_383_cast_fp16, var_599_cast_fp16))[name = tensor("op_625_cast_fp16")]; + tensor var_627_equation_0 = const()[name = tensor("op_627_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_383_cast_fp16, var_600_cast_fp16))[name = tensor("op_627_cast_fp16")]; + tensor var_629_equation_0 = const()[name = tensor("op_629_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_395_cast_fp16, var_601_cast_fp16))[name = tensor("op_629_cast_fp16")]; + tensor var_631_equation_0 = const()[name = tensor("op_631_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_395_cast_fp16, var_602_cast_fp16))[name = tensor("op_631_cast_fp16")]; + tensor var_633_equation_0 = const()[name = tensor("op_633_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_395_cast_fp16, var_603_cast_fp16))[name = tensor("op_633_cast_fp16")]; + tensor var_635_equation_0 = const()[name = tensor("op_635_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_407_cast_fp16, var_604_cast_fp16))[name = tensor("op_635_cast_fp16")]; + tensor var_637_equation_0 = const()[name = tensor("op_637_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_407_cast_fp16, var_605_cast_fp16))[name = tensor("op_637_cast_fp16")]; + tensor var_639_equation_0 = const()[name = tensor("op_639_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_407_cast_fp16, var_606_cast_fp16))[name = tensor("op_639_cast_fp16")]; + tensor var_641_equation_0 = const()[name = tensor("op_641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_419_cast_fp16, var_607_cast_fp16))[name = tensor("op_641_cast_fp16")]; + tensor var_643_equation_0 = const()[name = tensor("op_643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_419_cast_fp16, var_608_cast_fp16))[name = tensor("op_643_cast_fp16")]; + tensor var_645_equation_0 = const()[name = tensor("op_645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_419_cast_fp16, var_609_cast_fp16))[name = tensor("op_645_cast_fp16")]; + tensor var_647_equation_0 = const()[name = tensor("op_647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_647_cast_fp16 = einsum(equation = var_647_equation_0, values = (var_431_cast_fp16, var_610_cast_fp16))[name = tensor("op_647_cast_fp16")]; + tensor var_649_equation_0 = const()[name = tensor("op_649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_649_cast_fp16 = einsum(equation = var_649_equation_0, values = (var_431_cast_fp16, var_611_cast_fp16))[name = tensor("op_649_cast_fp16")]; + tensor var_651_equation_0 = const()[name = tensor("op_651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_651_cast_fp16 = einsum(equation = var_651_equation_0, values = (var_431_cast_fp16, var_612_cast_fp16))[name = tensor("op_651_cast_fp16")]; + tensor var_653_equation_0 = const()[name = tensor("op_653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_653_cast_fp16 = einsum(equation = var_653_equation_0, values = (var_443_cast_fp16, var_613_cast_fp16))[name = tensor("op_653_cast_fp16")]; + tensor var_655_equation_0 = const()[name = tensor("op_655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_655_cast_fp16 = einsum(equation = var_655_equation_0, values = (var_443_cast_fp16, var_614_cast_fp16))[name = tensor("op_655_cast_fp16")]; + tensor var_657_equation_0 = const()[name = tensor("op_657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_657_cast_fp16 = einsum(equation = var_657_equation_0, values = (var_443_cast_fp16, var_615_cast_fp16))[name = tensor("op_657_cast_fp16")]; + tensor var_659_equation_0 = const()[name = tensor("op_659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_659_cast_fp16 = einsum(equation = var_659_equation_0, values = (var_455_cast_fp16, var_616_cast_fp16))[name = tensor("op_659_cast_fp16")]; + tensor var_661_equation_0 = const()[name = tensor("op_661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_661_cast_fp16 = einsum(equation = var_661_equation_0, values = (var_455_cast_fp16, var_617_cast_fp16))[name = tensor("op_661_cast_fp16")]; + tensor var_663_equation_0 = const()[name = tensor("op_663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_663_cast_fp16 = einsum(equation = var_663_equation_0, values = (var_455_cast_fp16, var_618_cast_fp16))[name = tensor("op_663_cast_fp16")]; + tensor var_665_equation_0 = const()[name = tensor("op_665_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_665_cast_fp16 = einsum(equation = var_665_equation_0, values = (var_467_cast_fp16, var_619_cast_fp16))[name = tensor("op_665_cast_fp16")]; + tensor var_667_equation_0 = const()[name = tensor("op_667_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_667_cast_fp16 = einsum(equation = var_667_equation_0, values = (var_467_cast_fp16, var_620_cast_fp16))[name = tensor("op_667_cast_fp16")]; + tensor var_669_equation_0 = const()[name = tensor("op_669_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_669_cast_fp16 = einsum(equation = var_669_equation_0, values = (var_467_cast_fp16, var_621_cast_fp16))[name = tensor("op_669_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_52, interleave = x_11_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16, var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16, var_647_cast_fp16, var_649_cast_fp16, var_651_cast_fp16, var_653_cast_fp16, var_655_cast_fp16, var_657_cast_fp16, var_659_cast_fp16, var_661_cast_fp16, var_663_cast_fp16, var_665_cast_fp16, var_667_cast_fp16, var_669_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_674 = const()[name = tensor("op_674"), val = tensor([1, 3072, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_674, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_677 = const()[name = tensor("op_677"), val = tensor([1, 1])]; + tensor var_679 = const()[name = tensor("op_679"), val = tensor([1, 1])]; + tensor attention_output_1_pad_type_0 = const()[name = tensor("attention_output_1_pad_type_0"), val = tensor("custom")]; + tensor attention_output_1_pad_0 = const()[name = tensor("attention_output_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31463936)))]; + tensor attention_output_1_cast_fp16 = conv(dilations = var_679, groups = var_52, pad = attention_output_1_pad_0, pad_type = attention_output_1_pad_type_0, strides = var_677, weight = blocks_0_attn_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338368)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_52, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_55, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_705_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338560)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_716 = const()[name = tensor("op_716"), val = tensor([1, 1])]; + tensor var_718 = const()[name = tensor("op_718"), val = tensor([1, 1])]; + tensor input_7_pad_type_0 = const()[name = tensor("input_7_pad_type_0"), val = tensor("custom")]; + tensor input_7_pad_0 = const()[name = tensor("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50344768)))]; + tensor input_7_cast_fp16 = conv(dilations = var_718, groups = var_52, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_716, weight = blocks_0_mlp_fc_1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_722 = const()[name = tensor("op_722"), val = tensor([1, 1])]; + tensor var_724 = const()[name = tensor("op_724"), val = tensor([1, 1])]; + tensor x_fc_2_1_pad_type_0 = const()[name = tensor("x_fc_2_1_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_1_pad_0 = const()[name = tensor("x_fc_2_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100676480)))]; + tensor x_fc_2_1_cast_fp16 = conv(dilations = var_724, groups = var_52, pad = x_fc_2_1_pad_0, pad_type = x_fc_2_1_pad_type_0, strides = var_722, weight = blocks_0_mlp_fc_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_727_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_727_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_727_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_730 = const()[name = tensor("op_730"), val = tensor([1, 1])]; + tensor var_732 = const()[name = tensor("op_732"), val = tensor([1, 1])]; + tensor var_734_pad_type_0 = const()[name = tensor("op_734_pad_type_0"), val = tensor("custom")]; + tensor var_734_pad_0 = const()[name = tensor("op_734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151008192)))]; + tensor var_734_cast_fp16 = conv(dilations = var_732, groups = var_52, pad = var_734_pad_0, pad_type = var_734_pad_type_0, strides = var_730, weight = blocks_0_mlp_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("op_734_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_734_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_740 = const()[name = tensor("op_740"), val = tensor(-1)]; + tensor var_744 = const()[name = tensor("op_744"), val = tensor(-2)]; + tensor var_746 = const()[name = tensor("op_746"), val = tensor(-3)]; + tensor var_779 = const()[name = tensor("op_779"), val = tensor(1)]; + tensor var_782 = const()[name = tensor("op_782"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201339904)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_779, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_782, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_805_to_fp16 = const()[name = tensor("op_805_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_805_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201340096)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_829 = const()[name = tensor("op_829"), val = tensor([1, 3072, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_829, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_832 = const()[name = tensor("op_832"), val = tensor([1, 1])]; + tensor var_834 = const()[name = tensor("op_834"), val = tensor([1, 1])]; + tensor q_9_pad_type_0 = const()[name = tensor("q_9_pad_type_0"), val = tensor("custom")]; + tensor q_9_pad_0 = const()[name = tensor("q_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201346304)))]; + tensor q_9_cast_fp16 = conv(dilations = var_834, groups = var_779, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = var_832, weight = blocks_1_attn_q_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_838 = const()[name = tensor("op_838"), val = tensor([1, 1])]; + tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 1])]; + tensor k_13_pad_type_0 = const()[name = tensor("k_13_pad_type_0"), val = tensor("custom")]; + tensor k_13_pad_0 = const()[name = tensor("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220220736)))]; + tensor k_13_cast_fp16 = conv(dilations = var_840, groups = var_779, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = var_838, weight = blocks_1_attn_k_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_844 = const()[name = tensor("op_844"), val = tensor([1, 1])]; + tensor var_846 = const()[name = tensor("op_846"), val = tensor([1, 1])]; + tensor v_11_pad_type_0 = const()[name = tensor("v_11_pad_type_0"), val = tensor("custom")]; + tensor v_11_pad_0 = const()[name = tensor("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226512256)))]; + tensor v_11_cast_fp16 = conv(dilations = var_846, groups = var_779, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = var_844, weight = blocks_1_attn_v_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_849 = const()[name = tensor("op_849"), val = tensor([1, 24, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_849, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_851 = const()[name = tensor("op_851"), val = tensor([1, -1, 128, 64])]; + tensor k_15_cast_fp16 = reshape(shape = var_851, x = k_13_cast_fp16)[name = tensor("k_15_cast_fp16")]; + tensor var_865_begin_0 = const()[name = tensor("op_865_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_865_end_0 = const()[name = tensor("op_865_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_865_end_mask_0 = const()[name = tensor("op_865_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_865_cast_fp16")]; + tensor var_871_begin_0 = const()[name = tensor("op_871_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_871_end_0 = const()[name = tensor("op_871_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_871_end_mask_0 = const()[name = tensor("op_871_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_871_cast_fp16 = slice_by_index(begin = var_871_begin_0, end = var_871_end_0, end_mask = var_871_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_871_cast_fp16")]; + tensor const_30_promoted_to_fp16 = const()[name = tensor("const_30_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_873_cast_fp16 = mul(x = var_871_cast_fp16, y = const_30_promoted_to_fp16)[name = tensor("op_873_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_744, interleave = rotated_5_interleave_0, values = (var_873_cast_fp16, var_865_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_876_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_876_cast_fp16")]; + tensor var_877_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_877_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_876_cast_fp16, y = var_877_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_890_begin_0 = const()[name = tensor("op_890_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_890_end_0 = const()[name = tensor("op_890_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_890_end_mask_0 = const()[name = tensor("op_890_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_890_cast_fp16 = slice_by_index(begin = var_890_begin_0, end = var_890_end_0, end_mask = var_890_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_890_cast_fp16")]; + tensor var_896_begin_0 = const()[name = tensor("op_896_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_896_end_0 = const()[name = tensor("op_896_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_896_end_mask_0 = const()[name = tensor("op_896_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_896_cast_fp16 = slice_by_index(begin = var_896_begin_0, end = var_896_end_0, end_mask = var_896_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_896_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_898_cast_fp16 = mul(x = var_896_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_898_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_744, interleave = rotated_interleave_0, values = (var_898_cast_fp16, var_890_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_901_cast_fp16 = mul(x = k_15_cast_fp16, y = cos)[name = tensor("op_901_cast_fp16")]; + tensor var_902_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_902_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_901_cast_fp16, y = var_902_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_905 = const()[name = tensor("op_905"), val = tensor([1, -1, 1, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_905, x = roped_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_907 = const()[name = tensor("op_907"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_907, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_21_perm_0, x = k_19_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_746, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_740, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_915 = const()[name = tensor("op_915"), val = tensor([1, 3072, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_915, x = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_920_begin_0 = const()[name = tensor("op_920_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_end_0 = const()[name = tensor("op_920_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_920_end_mask_0 = const()[name = tensor("op_920_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = q_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor var_924_begin_0 = const()[name = tensor("op_924_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_924_end_0 = const()[name = tensor("op_924_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_924_end_mask_0 = const()[name = tensor("op_924_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_924_cast_fp16 = slice_by_index(begin = var_924_begin_0, end = var_924_end_0, end_mask = var_924_end_mask_0, x = q_cast_fp16)[name = tensor("op_924_cast_fp16")]; + tensor var_928_begin_0 = const()[name = tensor("op_928_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_928_end_0 = const()[name = tensor("op_928_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_928_end_mask_0 = const()[name = tensor("op_928_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = q_cast_fp16)[name = tensor("op_928_cast_fp16")]; + tensor var_932_begin_0 = const()[name = tensor("op_932_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_932_end_0 = const()[name = tensor("op_932_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_932_end_mask_0 = const()[name = tensor("op_932_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_932_cast_fp16 = slice_by_index(begin = var_932_begin_0, end = var_932_end_0, end_mask = var_932_end_mask_0, x = q_cast_fp16)[name = tensor("op_932_cast_fp16")]; + tensor var_936_begin_0 = const()[name = tensor("op_936_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_936_end_0 = const()[name = tensor("op_936_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_936_end_mask_0 = const()[name = tensor("op_936_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_936_cast_fp16 = slice_by_index(begin = var_936_begin_0, end = var_936_end_0, end_mask = var_936_end_mask_0, x = q_cast_fp16)[name = tensor("op_936_cast_fp16")]; + tensor var_940_begin_0 = const()[name = tensor("op_940_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_940_end_0 = const()[name = tensor("op_940_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_940_end_mask_0 = const()[name = tensor("op_940_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_940_cast_fp16 = slice_by_index(begin = var_940_begin_0, end = var_940_end_0, end_mask = var_940_end_mask_0, x = q_cast_fp16)[name = tensor("op_940_cast_fp16")]; + tensor var_944_begin_0 = const()[name = tensor("op_944_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_944_end_0 = const()[name = tensor("op_944_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_944_end_mask_0 = const()[name = tensor("op_944_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_944_cast_fp16 = slice_by_index(begin = var_944_begin_0, end = var_944_end_0, end_mask = var_944_end_mask_0, x = q_cast_fp16)[name = tensor("op_944_cast_fp16")]; + tensor var_948_begin_0 = const()[name = tensor("op_948_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_948_end_0 = const()[name = tensor("op_948_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_948_end_mask_0 = const()[name = tensor("op_948_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_948_cast_fp16 = slice_by_index(begin = var_948_begin_0, end = var_948_end_0, end_mask = var_948_end_mask_0, x = q_cast_fp16)[name = tensor("op_948_cast_fp16")]; + tensor var_952_begin_0 = const()[name = tensor("op_952_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_952_end_0 = const()[name = tensor("op_952_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_952_end_mask_0 = const()[name = tensor("op_952_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_952_cast_fp16 = slice_by_index(begin = var_952_begin_0, end = var_952_end_0, end_mask = var_952_end_mask_0, x = q_cast_fp16)[name = tensor("op_952_cast_fp16")]; + tensor var_956_begin_0 = const()[name = tensor("op_956_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_956_end_0 = const()[name = tensor("op_956_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_956_end_mask_0 = const()[name = tensor("op_956_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_956_cast_fp16 = slice_by_index(begin = var_956_begin_0, end = var_956_end_0, end_mask = var_956_end_mask_0, x = q_cast_fp16)[name = tensor("op_956_cast_fp16")]; + tensor var_960_begin_0 = const()[name = tensor("op_960_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_960_end_0 = const()[name = tensor("op_960_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_960_end_mask_0 = const()[name = tensor("op_960_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_960_cast_fp16 = slice_by_index(begin = var_960_begin_0, end = var_960_end_0, end_mask = var_960_end_mask_0, x = q_cast_fp16)[name = tensor("op_960_cast_fp16")]; + tensor var_964_begin_0 = const()[name = tensor("op_964_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_964_end_0 = const()[name = tensor("op_964_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_964_end_mask_0 = const()[name = tensor("op_964_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_964_cast_fp16 = slice_by_index(begin = var_964_begin_0, end = var_964_end_0, end_mask = var_964_end_mask_0, x = q_cast_fp16)[name = tensor("op_964_cast_fp16")]; + tensor var_968_begin_0 = const()[name = tensor("op_968_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_968_end_0 = const()[name = tensor("op_968_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_968_end_mask_0 = const()[name = tensor("op_968_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = q_cast_fp16)[name = tensor("op_968_cast_fp16")]; + tensor var_972_begin_0 = const()[name = tensor("op_972_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_972_end_0 = const()[name = tensor("op_972_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_972_end_mask_0 = const()[name = tensor("op_972_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = q_cast_fp16)[name = tensor("op_972_cast_fp16")]; + tensor var_976_begin_0 = const()[name = tensor("op_976_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_976_end_0 = const()[name = tensor("op_976_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_976_end_mask_0 = const()[name = tensor("op_976_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = var_976_end_0, end_mask = var_976_end_mask_0, x = q_cast_fp16)[name = tensor("op_976_cast_fp16")]; + tensor var_980_begin_0 = const()[name = tensor("op_980_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_980_end_0 = const()[name = tensor("op_980_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_980_end_mask_0 = const()[name = tensor("op_980_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_980_cast_fp16 = slice_by_index(begin = var_980_begin_0, end = var_980_end_0, end_mask = var_980_end_mask_0, x = q_cast_fp16)[name = tensor("op_980_cast_fp16")]; + tensor var_984_begin_0 = const()[name = tensor("op_984_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_984_end_0 = const()[name = tensor("op_984_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_984_end_mask_0 = const()[name = tensor("op_984_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_984_cast_fp16 = slice_by_index(begin = var_984_begin_0, end = var_984_end_0, end_mask = var_984_end_mask_0, x = q_cast_fp16)[name = tensor("op_984_cast_fp16")]; + tensor var_988_begin_0 = const()[name = tensor("op_988_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_988_end_0 = const()[name = tensor("op_988_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_988_end_mask_0 = const()[name = tensor("op_988_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_988_cast_fp16 = slice_by_index(begin = var_988_begin_0, end = var_988_end_0, end_mask = var_988_end_mask_0, x = q_cast_fp16)[name = tensor("op_988_cast_fp16")]; + tensor var_992_begin_0 = const()[name = tensor("op_992_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_992_end_0 = const()[name = tensor("op_992_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_992_end_mask_0 = const()[name = tensor("op_992_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_992_cast_fp16 = slice_by_index(begin = var_992_begin_0, end = var_992_end_0, end_mask = var_992_end_mask_0, x = q_cast_fp16)[name = tensor("op_992_cast_fp16")]; + tensor var_996_begin_0 = const()[name = tensor("op_996_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_996_end_0 = const()[name = tensor("op_996_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_996_end_mask_0 = const()[name = tensor("op_996_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_996_cast_fp16 = slice_by_index(begin = var_996_begin_0, end = var_996_end_0, end_mask = var_996_end_mask_0, x = q_cast_fp16)[name = tensor("op_996_cast_fp16")]; + tensor var_1000_begin_0 = const()[name = tensor("op_1000_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1000_end_0 = const()[name = tensor("op_1000_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1000_end_mask_0 = const()[name = tensor("op_1000_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = var_1000_end_0, end_mask = var_1000_end_mask_0, x = q_cast_fp16)[name = tensor("op_1000_cast_fp16")]; + tensor var_1004_begin_0 = const()[name = tensor("op_1004_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1004_end_0 = const()[name = tensor("op_1004_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1004_end_mask_0 = const()[name = tensor("op_1004_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1004_cast_fp16 = slice_by_index(begin = var_1004_begin_0, end = var_1004_end_0, end_mask = var_1004_end_mask_0, x = q_cast_fp16)[name = tensor("op_1004_cast_fp16")]; + tensor var_1008_begin_0 = const()[name = tensor("op_1008_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1008_end_0 = const()[name = tensor("op_1008_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1008_end_mask_0 = const()[name = tensor("op_1008_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = var_1008_end_0, end_mask = var_1008_end_mask_0, x = q_cast_fp16)[name = tensor("op_1008_cast_fp16")]; + tensor var_1012_begin_0 = const()[name = tensor("op_1012_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1012_end_0 = const()[name = tensor("op_1012_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1012_end_mask_0 = const()[name = tensor("op_1012_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1012_cast_fp16 = slice_by_index(begin = var_1012_begin_0, end = var_1012_end_0, end_mask = var_1012_end_mask_0, x = q_cast_fp16)[name = tensor("op_1012_cast_fp16")]; + tensor var_1018_begin_0 = const()[name = tensor("op_1018_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1018_end_0 = const()[name = tensor("op_1018_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1018_end_mask_0 = const()[name = tensor("op_1018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1018_cast_fp16 = slice_by_index(begin = var_1018_begin_0, end = var_1018_end_0, end_mask = var_1018_end_mask_0, x = k_cast_fp16)[name = tensor("op_1018_cast_fp16")]; + tensor var_1030_begin_0 = const()[name = tensor("op_1030_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1030_end_0 = const()[name = tensor("op_1030_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1030_end_mask_0 = const()[name = tensor("op_1030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, x = k_cast_fp16)[name = tensor("op_1030_cast_fp16")]; + tensor var_1042_begin_0 = const()[name = tensor("op_1042_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1042_end_0 = const()[name = tensor("op_1042_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1042_end_mask_0 = const()[name = tensor("op_1042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = k_cast_fp16)[name = tensor("op_1042_cast_fp16")]; + tensor var_1054_begin_0 = const()[name = tensor("op_1054_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1054_end_0 = const()[name = tensor("op_1054_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1054_end_mask_0 = const()[name = tensor("op_1054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1054_cast_fp16 = slice_by_index(begin = var_1054_begin_0, end = var_1054_end_0, end_mask = var_1054_end_mask_0, x = k_cast_fp16)[name = tensor("op_1054_cast_fp16")]; + tensor var_1066_begin_0 = const()[name = tensor("op_1066_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1066_end_0 = const()[name = tensor("op_1066_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1066_end_mask_0 = const()[name = tensor("op_1066_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1066_cast_fp16 = slice_by_index(begin = var_1066_begin_0, end = var_1066_end_0, end_mask = var_1066_end_mask_0, x = k_cast_fp16)[name = tensor("op_1066_cast_fp16")]; + tensor var_1078_begin_0 = const()[name = tensor("op_1078_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1078_end_0 = const()[name = tensor("op_1078_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1078_end_mask_0 = const()[name = tensor("op_1078_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1078_cast_fp16 = slice_by_index(begin = var_1078_begin_0, end = var_1078_end_0, end_mask = var_1078_end_mask_0, x = k_cast_fp16)[name = tensor("op_1078_cast_fp16")]; + tensor var_1090_begin_0 = const()[name = tensor("op_1090_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1090_end_0 = const()[name = tensor("op_1090_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1090_end_mask_0 = const()[name = tensor("op_1090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = k_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor var_1102_begin_0 = const()[name = tensor("op_1102_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1102_end_0 = const()[name = tensor("op_1102_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1102_end_mask_0 = const()[name = tensor("op_1102_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1102_cast_fp16 = slice_by_index(begin = var_1102_begin_0, end = var_1102_end_0, end_mask = var_1102_end_mask_0, x = k_cast_fp16)[name = tensor("op_1102_cast_fp16")]; + tensor var_1112_begin_0 = const()[name = tensor("op_1112_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1112_end_0 = const()[name = tensor("op_1112_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1112_end_mask_0 = const()[name = tensor("op_1112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1112_cast_fp16")]; + tensor var_1124_begin_0 = const()[name = tensor("op_1124_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1124_end_0 = const()[name = tensor("op_1124_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1124_end_mask_0 = const()[name = tensor("op_1124_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1124_cast_fp16")]; + tensor var_1136_begin_0 = const()[name = tensor("op_1136_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1136_end_0 = const()[name = tensor("op_1136_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1136_end_mask_0 = const()[name = tensor("op_1136_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1136_cast_fp16")]; + tensor var_1148_begin_0 = const()[name = tensor("op_1148_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1148_end_0 = const()[name = tensor("op_1148_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1148_end_mask_0 = const()[name = tensor("op_1148_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1148_cast_fp16")]; + tensor var_1160_begin_0 = const()[name = tensor("op_1160_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1160_end_0 = const()[name = tensor("op_1160_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1160_end_mask_0 = const()[name = tensor("op_1160_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1160_cast_fp16")]; + tensor var_1172_begin_0 = const()[name = tensor("op_1172_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1172_end_0 = const()[name = tensor("op_1172_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1172_end_mask_0 = const()[name = tensor("op_1172_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1172_cast_fp16 = slice_by_index(begin = var_1172_begin_0, end = var_1172_end_0, end_mask = var_1172_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1172_cast_fp16")]; + tensor var_1184_begin_0 = const()[name = tensor("op_1184_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1184_end_0 = const()[name = tensor("op_1184_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1184_end_mask_0 = const()[name = tensor("op_1184_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1184_cast_fp16 = slice_by_index(begin = var_1184_begin_0, end = var_1184_end_0, end_mask = var_1184_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1184_cast_fp16")]; + tensor var_1196_begin_0 = const()[name = tensor("op_1196_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1196_end_0 = const()[name = tensor("op_1196_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1196_end_mask_0 = const()[name = tensor("op_1196_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1196_cast_fp16")]; + tensor var_1208_equation_0 = const()[name = tensor("op_1208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1208_cast_fp16 = einsum(equation = var_1208_equation_0, values = (var_1018_cast_fp16, var_920_cast_fp16))[name = tensor("op_1208_cast_fp16")]; + tensor var_1209_to_fp16 = const()[name = tensor("op_1209_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1210_cast_fp16 = mul(x = var_1208_cast_fp16, y = var_1209_to_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1212_equation_0 = const()[name = tensor("op_1212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1212_cast_fp16 = einsum(equation = var_1212_equation_0, values = (var_1018_cast_fp16, var_924_cast_fp16))[name = tensor("op_1212_cast_fp16")]; + tensor var_1213_to_fp16 = const()[name = tensor("op_1213_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1214_cast_fp16 = mul(x = var_1212_cast_fp16, y = var_1213_to_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1216_equation_0 = const()[name = tensor("op_1216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1216_cast_fp16 = einsum(equation = var_1216_equation_0, values = (var_1018_cast_fp16, var_928_cast_fp16))[name = tensor("op_1216_cast_fp16")]; + tensor var_1217_to_fp16 = const()[name = tensor("op_1217_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1218_cast_fp16 = mul(x = var_1216_cast_fp16, y = var_1217_to_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1220_equation_0 = const()[name = tensor("op_1220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1220_cast_fp16 = einsum(equation = var_1220_equation_0, values = (var_1030_cast_fp16, var_932_cast_fp16))[name = tensor("op_1220_cast_fp16")]; + tensor var_1221_to_fp16 = const()[name = tensor("op_1221_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1222_cast_fp16 = mul(x = var_1220_cast_fp16, y = var_1221_to_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1224_equation_0 = const()[name = tensor("op_1224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1224_cast_fp16 = einsum(equation = var_1224_equation_0, values = (var_1030_cast_fp16, var_936_cast_fp16))[name = tensor("op_1224_cast_fp16")]; + tensor var_1225_to_fp16 = const()[name = tensor("op_1225_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1226_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1030_cast_fp16, var_940_cast_fp16))[name = tensor("op_1228_cast_fp16")]; + tensor var_1229_to_fp16 = const()[name = tensor("op_1229_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1230_cast_fp16 = mul(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1042_cast_fp16, var_944_cast_fp16))[name = tensor("op_1232_cast_fp16")]; + tensor var_1233_to_fp16 = const()[name = tensor("op_1233_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1234_cast_fp16 = mul(x = var_1232_cast_fp16, y = var_1233_to_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1042_cast_fp16, var_948_cast_fp16))[name = tensor("op_1236_cast_fp16")]; + tensor var_1237_to_fp16 = const()[name = tensor("op_1237_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1238_cast_fp16 = mul(x = var_1236_cast_fp16, y = var_1237_to_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1042_cast_fp16, var_952_cast_fp16))[name = tensor("op_1240_cast_fp16")]; + tensor var_1241_to_fp16 = const()[name = tensor("op_1241_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1242_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1054_cast_fp16, var_956_cast_fp16))[name = tensor("op_1244_cast_fp16")]; + tensor var_1245_to_fp16 = const()[name = tensor("op_1245_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1246_cast_fp16 = mul(x = var_1244_cast_fp16, y = var_1245_to_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1054_cast_fp16, var_960_cast_fp16))[name = tensor("op_1248_cast_fp16")]; + tensor var_1249_to_fp16 = const()[name = tensor("op_1249_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1250_cast_fp16 = mul(x = var_1248_cast_fp16, y = var_1249_to_fp16)[name = tensor("op_1250_cast_fp16")]; + tensor var_1252_equation_0 = const()[name = tensor("op_1252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1054_cast_fp16, var_964_cast_fp16))[name = tensor("op_1252_cast_fp16")]; + tensor var_1253_to_fp16 = const()[name = tensor("op_1253_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1254_cast_fp16 = mul(x = var_1252_cast_fp16, y = var_1253_to_fp16)[name = tensor("op_1254_cast_fp16")]; + tensor var_1256_equation_0 = const()[name = tensor("op_1256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1066_cast_fp16, var_968_cast_fp16))[name = tensor("op_1256_cast_fp16")]; + tensor var_1257_to_fp16 = const()[name = tensor("op_1257_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1258_cast_fp16 = mul(x = var_1256_cast_fp16, y = var_1257_to_fp16)[name = tensor("op_1258_cast_fp16")]; + tensor var_1260_equation_0 = const()[name = tensor("op_1260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1066_cast_fp16, var_972_cast_fp16))[name = tensor("op_1260_cast_fp16")]; + tensor var_1261_to_fp16 = const()[name = tensor("op_1261_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1262_cast_fp16 = mul(x = var_1260_cast_fp16, y = var_1261_to_fp16)[name = tensor("op_1262_cast_fp16")]; + tensor var_1264_equation_0 = const()[name = tensor("op_1264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1066_cast_fp16, var_976_cast_fp16))[name = tensor("op_1264_cast_fp16")]; + tensor var_1265_to_fp16 = const()[name = tensor("op_1265_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1266_cast_fp16 = mul(x = var_1264_cast_fp16, y = var_1265_to_fp16)[name = tensor("op_1266_cast_fp16")]; + tensor var_1268_equation_0 = const()[name = tensor("op_1268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1078_cast_fp16, var_980_cast_fp16))[name = tensor("op_1268_cast_fp16")]; + tensor var_1269_to_fp16 = const()[name = tensor("op_1269_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1270_cast_fp16 = mul(x = var_1268_cast_fp16, y = var_1269_to_fp16)[name = tensor("op_1270_cast_fp16")]; + tensor var_1272_equation_0 = const()[name = tensor("op_1272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1078_cast_fp16, var_984_cast_fp16))[name = tensor("op_1272_cast_fp16")]; + tensor var_1273_to_fp16 = const()[name = tensor("op_1273_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1274_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = tensor("op_1274_cast_fp16")]; + tensor var_1276_equation_0 = const()[name = tensor("op_1276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1078_cast_fp16, var_988_cast_fp16))[name = tensor("op_1276_cast_fp16")]; + tensor var_1277_to_fp16 = const()[name = tensor("op_1277_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1278_cast_fp16 = mul(x = var_1276_cast_fp16, y = var_1277_to_fp16)[name = tensor("op_1278_cast_fp16")]; + tensor var_1280_equation_0 = const()[name = tensor("op_1280_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1090_cast_fp16, var_992_cast_fp16))[name = tensor("op_1280_cast_fp16")]; + tensor var_1281_to_fp16 = const()[name = tensor("op_1281_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1282_cast_fp16 = mul(x = var_1280_cast_fp16, y = var_1281_to_fp16)[name = tensor("op_1282_cast_fp16")]; + tensor var_1284_equation_0 = const()[name = tensor("op_1284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1284_cast_fp16 = einsum(equation = var_1284_equation_0, values = (var_1090_cast_fp16, var_996_cast_fp16))[name = tensor("op_1284_cast_fp16")]; + tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1286_cast_fp16 = mul(x = var_1284_cast_fp16, y = var_1285_to_fp16)[name = tensor("op_1286_cast_fp16")]; + tensor var_1288_equation_0 = const()[name = tensor("op_1288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1288_cast_fp16 = einsum(equation = var_1288_equation_0, values = (var_1090_cast_fp16, var_1000_cast_fp16))[name = tensor("op_1288_cast_fp16")]; + tensor var_1289_to_fp16 = const()[name = tensor("op_1289_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1290_cast_fp16 = mul(x = var_1288_cast_fp16, y = var_1289_to_fp16)[name = tensor("op_1290_cast_fp16")]; + tensor var_1292_equation_0 = const()[name = tensor("op_1292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1292_cast_fp16 = einsum(equation = var_1292_equation_0, values = (var_1102_cast_fp16, var_1004_cast_fp16))[name = tensor("op_1292_cast_fp16")]; + tensor var_1293_to_fp16 = const()[name = tensor("op_1293_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1294_cast_fp16 = mul(x = var_1292_cast_fp16, y = var_1293_to_fp16)[name = tensor("op_1294_cast_fp16")]; + tensor var_1296_equation_0 = const()[name = tensor("op_1296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1296_cast_fp16 = einsum(equation = var_1296_equation_0, values = (var_1102_cast_fp16, var_1008_cast_fp16))[name = tensor("op_1296_cast_fp16")]; + tensor var_1297_to_fp16 = const()[name = tensor("op_1297_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1298_cast_fp16 = mul(x = var_1296_cast_fp16, y = var_1297_to_fp16)[name = tensor("op_1298_cast_fp16")]; + tensor var_1300_equation_0 = const()[name = tensor("op_1300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1300_cast_fp16 = einsum(equation = var_1300_equation_0, values = (var_1102_cast_fp16, var_1012_cast_fp16))[name = tensor("op_1300_cast_fp16")]; + tensor var_1301_to_fp16 = const()[name = tensor("op_1301_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1302_cast_fp16 = mul(x = var_1300_cast_fp16, y = var_1301_to_fp16)[name = tensor("op_1302_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_1210_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_1214_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_1218_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_1222_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_1226_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_1230_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_1234_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_1238_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1242_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1246_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1250_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1254_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1258_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1262_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1266_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1270_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1274_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1278_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1282_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1286_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1290_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1294_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1298_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_1302_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_1327_cast_fp16 = softmax(axis = var_779, x = aw_49_cast_fp16)[name = tensor("op_1327_cast_fp16")]; + tensor var_1328_cast_fp16 = softmax(axis = var_779, x = aw_51_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1329_cast_fp16 = softmax(axis = var_779, x = aw_53_cast_fp16)[name = tensor("op_1329_cast_fp16")]; + tensor var_1330_cast_fp16 = softmax(axis = var_779, x = aw_55_cast_fp16)[name = tensor("op_1330_cast_fp16")]; + tensor var_1331_cast_fp16 = softmax(axis = var_779, x = aw_57_cast_fp16)[name = tensor("op_1331_cast_fp16")]; + tensor var_1332_cast_fp16 = softmax(axis = var_779, x = aw_59_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1333_cast_fp16 = softmax(axis = var_779, x = aw_61_cast_fp16)[name = tensor("op_1333_cast_fp16")]; + tensor var_1334_cast_fp16 = softmax(axis = var_779, x = aw_63_cast_fp16)[name = tensor("op_1334_cast_fp16")]; + tensor var_1335_cast_fp16 = softmax(axis = var_779, x = aw_65_cast_fp16)[name = tensor("op_1335_cast_fp16")]; + tensor var_1336_cast_fp16 = softmax(axis = var_779, x = aw_67_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1337_cast_fp16 = softmax(axis = var_779, x = aw_69_cast_fp16)[name = tensor("op_1337_cast_fp16")]; + tensor var_1338_cast_fp16 = softmax(axis = var_779, x = aw_71_cast_fp16)[name = tensor("op_1338_cast_fp16")]; + tensor var_1339_cast_fp16 = softmax(axis = var_779, x = aw_73_cast_fp16)[name = tensor("op_1339_cast_fp16")]; + tensor var_1340_cast_fp16 = softmax(axis = var_779, x = aw_75_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1341_cast_fp16 = softmax(axis = var_779, x = aw_77_cast_fp16)[name = tensor("op_1341_cast_fp16")]; + tensor var_1342_cast_fp16 = softmax(axis = var_779, x = aw_79_cast_fp16)[name = tensor("op_1342_cast_fp16")]; + tensor var_1343_cast_fp16 = softmax(axis = var_779, x = aw_81_cast_fp16)[name = tensor("op_1343_cast_fp16")]; + tensor var_1344_cast_fp16 = softmax(axis = var_779, x = aw_83_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1345_cast_fp16 = softmax(axis = var_779, x = aw_85_cast_fp16)[name = tensor("op_1345_cast_fp16")]; + tensor var_1346_cast_fp16 = softmax(axis = var_779, x = aw_87_cast_fp16)[name = tensor("op_1346_cast_fp16")]; + tensor var_1347_cast_fp16 = softmax(axis = var_779, x = aw_89_cast_fp16)[name = tensor("op_1347_cast_fp16")]; + tensor var_1348_cast_fp16 = softmax(axis = var_779, x = aw_91_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1349_cast_fp16 = softmax(axis = var_779, x = aw_93_cast_fp16)[name = tensor("op_1349_cast_fp16")]; + tensor var_1350_cast_fp16 = softmax(axis = var_779, x = aw_cast_fp16)[name = tensor("op_1350_cast_fp16")]; + tensor var_1352_equation_0 = const()[name = tensor("op_1352_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1352_cast_fp16 = einsum(equation = var_1352_equation_0, values = (var_1112_cast_fp16, var_1327_cast_fp16))[name = tensor("op_1352_cast_fp16")]; + tensor var_1354_equation_0 = const()[name = tensor("op_1354_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1354_cast_fp16 = einsum(equation = var_1354_equation_0, values = (var_1112_cast_fp16, var_1328_cast_fp16))[name = tensor("op_1354_cast_fp16")]; + tensor var_1356_equation_0 = const()[name = tensor("op_1356_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1356_cast_fp16 = einsum(equation = var_1356_equation_0, values = (var_1112_cast_fp16, var_1329_cast_fp16))[name = tensor("op_1356_cast_fp16")]; + tensor var_1358_equation_0 = const()[name = tensor("op_1358_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1358_cast_fp16 = einsum(equation = var_1358_equation_0, values = (var_1124_cast_fp16, var_1330_cast_fp16))[name = tensor("op_1358_cast_fp16")]; + tensor var_1360_equation_0 = const()[name = tensor("op_1360_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1360_cast_fp16 = einsum(equation = var_1360_equation_0, values = (var_1124_cast_fp16, var_1331_cast_fp16))[name = tensor("op_1360_cast_fp16")]; + tensor var_1362_equation_0 = const()[name = tensor("op_1362_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1362_cast_fp16 = einsum(equation = var_1362_equation_0, values = (var_1124_cast_fp16, var_1332_cast_fp16))[name = tensor("op_1362_cast_fp16")]; + tensor var_1364_equation_0 = const()[name = tensor("op_1364_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1364_cast_fp16 = einsum(equation = var_1364_equation_0, values = (var_1136_cast_fp16, var_1333_cast_fp16))[name = tensor("op_1364_cast_fp16")]; + tensor var_1366_equation_0 = const()[name = tensor("op_1366_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1366_cast_fp16 = einsum(equation = var_1366_equation_0, values = (var_1136_cast_fp16, var_1334_cast_fp16))[name = tensor("op_1366_cast_fp16")]; + tensor var_1368_equation_0 = const()[name = tensor("op_1368_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1368_cast_fp16 = einsum(equation = var_1368_equation_0, values = (var_1136_cast_fp16, var_1335_cast_fp16))[name = tensor("op_1368_cast_fp16")]; + tensor var_1370_equation_0 = const()[name = tensor("op_1370_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1370_cast_fp16 = einsum(equation = var_1370_equation_0, values = (var_1148_cast_fp16, var_1336_cast_fp16))[name = tensor("op_1370_cast_fp16")]; + tensor var_1372_equation_0 = const()[name = tensor("op_1372_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1372_cast_fp16 = einsum(equation = var_1372_equation_0, values = (var_1148_cast_fp16, var_1337_cast_fp16))[name = tensor("op_1372_cast_fp16")]; + tensor var_1374_equation_0 = const()[name = tensor("op_1374_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1374_cast_fp16 = einsum(equation = var_1374_equation_0, values = (var_1148_cast_fp16, var_1338_cast_fp16))[name = tensor("op_1374_cast_fp16")]; + tensor var_1376_equation_0 = const()[name = tensor("op_1376_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1376_cast_fp16 = einsum(equation = var_1376_equation_0, values = (var_1160_cast_fp16, var_1339_cast_fp16))[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_equation_0 = const()[name = tensor("op_1378_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1378_cast_fp16 = einsum(equation = var_1378_equation_0, values = (var_1160_cast_fp16, var_1340_cast_fp16))[name = tensor("op_1378_cast_fp16")]; + tensor var_1380_equation_0 = const()[name = tensor("op_1380_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1380_cast_fp16 = einsum(equation = var_1380_equation_0, values = (var_1160_cast_fp16, var_1341_cast_fp16))[name = tensor("op_1380_cast_fp16")]; + tensor var_1382_equation_0 = const()[name = tensor("op_1382_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1382_cast_fp16 = einsum(equation = var_1382_equation_0, values = (var_1172_cast_fp16, var_1342_cast_fp16))[name = tensor("op_1382_cast_fp16")]; + tensor var_1384_equation_0 = const()[name = tensor("op_1384_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1384_cast_fp16 = einsum(equation = var_1384_equation_0, values = (var_1172_cast_fp16, var_1343_cast_fp16))[name = tensor("op_1384_cast_fp16")]; + tensor var_1386_equation_0 = const()[name = tensor("op_1386_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1386_cast_fp16 = einsum(equation = var_1386_equation_0, values = (var_1172_cast_fp16, var_1344_cast_fp16))[name = tensor("op_1386_cast_fp16")]; + tensor var_1388_equation_0 = const()[name = tensor("op_1388_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1388_cast_fp16 = einsum(equation = var_1388_equation_0, values = (var_1184_cast_fp16, var_1345_cast_fp16))[name = tensor("op_1388_cast_fp16")]; + tensor var_1390_equation_0 = const()[name = tensor("op_1390_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1390_cast_fp16 = einsum(equation = var_1390_equation_0, values = (var_1184_cast_fp16, var_1346_cast_fp16))[name = tensor("op_1390_cast_fp16")]; + tensor var_1392_equation_0 = const()[name = tensor("op_1392_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1392_cast_fp16 = einsum(equation = var_1392_equation_0, values = (var_1184_cast_fp16, var_1347_cast_fp16))[name = tensor("op_1392_cast_fp16")]; + tensor var_1394_equation_0 = const()[name = tensor("op_1394_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1394_cast_fp16 = einsum(equation = var_1394_equation_0, values = (var_1196_cast_fp16, var_1348_cast_fp16))[name = tensor("op_1394_cast_fp16")]; + tensor var_1396_equation_0 = const()[name = tensor("op_1396_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1396_cast_fp16 = einsum(equation = var_1396_equation_0, values = (var_1196_cast_fp16, var_1349_cast_fp16))[name = tensor("op_1396_cast_fp16")]; + tensor var_1398_equation_0 = const()[name = tensor("op_1398_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1398_cast_fp16 = einsum(equation = var_1398_equation_0, values = (var_1196_cast_fp16, var_1350_cast_fp16))[name = tensor("op_1398_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_779, interleave = x_27_interleave_0, values = (var_1352_cast_fp16, var_1354_cast_fp16, var_1356_cast_fp16, var_1358_cast_fp16, var_1360_cast_fp16, var_1362_cast_fp16, var_1364_cast_fp16, var_1366_cast_fp16, var_1368_cast_fp16, var_1370_cast_fp16, var_1372_cast_fp16, var_1374_cast_fp16, var_1376_cast_fp16, var_1378_cast_fp16, var_1380_cast_fp16, var_1382_cast_fp16, var_1384_cast_fp16, var_1386_cast_fp16, var_1388_cast_fp16, var_1390_cast_fp16, var_1392_cast_fp16, var_1394_cast_fp16, var_1396_cast_fp16, var_1398_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1403 = const()[name = tensor("op_1403"), val = tensor([1, 3072, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1403, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1406 = const()[name = tensor("op_1406"), val = tensor([1, 1])]; + tensor var_1408 = const()[name = tensor("op_1408"), val = tensor([1, 1])]; + tensor attention_output_pad_type_0 = const()[name = tensor("attention_output_pad_type_0"), val = tensor("custom")]; + tensor attention_output_pad_0 = const()[name = tensor("attention_output_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232803776)))]; + tensor attention_output_cast_fp16 = conv(dilations = var_1408, groups = var_779, pad = attention_output_pad_0, pad_type = attention_output_pad_type_0, strides = var_1406, weight = blocks_1_attn_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678208)))]; + tensor x_eps_cast_fp16 = concat(axis = var_779, interleave = x_eps_interleave_0, values = (x_29_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_782, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1434_to_fp16 = const()[name = tensor("op_1434_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1434_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678400)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1445 = const()[name = tensor("op_1445"), val = tensor([1, 1])]; + tensor var_1447 = const()[name = tensor("op_1447"), val = tensor([1, 1])]; + tensor input_17_pad_type_0 = const()[name = tensor("input_17_pad_type_0"), val = tensor("custom")]; + tensor input_17_pad_0 = const()[name = tensor("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251684608)))]; + tensor input_17_cast_fp16 = conv(dilations = var_1447, groups = var_779, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_1445, weight = blocks_1_mlp_fc_1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1451 = const()[name = tensor("op_1451"), val = tensor([1, 1])]; + tensor var_1453 = const()[name = tensor("op_1453"), val = tensor([1, 1])]; + tensor x_fc_2_pad_type_0 = const()[name = tensor("x_fc_2_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_pad_0 = const()[name = tensor("x_fc_2_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302016320)))]; + tensor x_fc_2_cast_fp16 = conv(dilations = var_1453, groups = var_779, pad = x_fc_2_pad_0, pad_type = x_fc_2_pad_type_0, strides = var_1451, weight = blocks_1_mlp_fc_2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_1456_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1456_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_1456_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_1459 = const()[name = tensor("op_1459"), val = tensor([1, 1])]; + tensor var_1461 = const()[name = tensor("op_1461"), val = tensor([1, 1])]; + tensor var_1463_pad_type_0 = const()[name = tensor("op_1463_pad_type_0"), val = tensor("custom")]; + tensor var_1463_pad_0 = const()[name = tensor("op_1463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(352348032)))]; + tensor var_1463_cast_fp16 = conv(dilations = var_1461, groups = var_779, pad = var_1463_pad_0, pad_type = var_1463_pad_type_0, strides = var_1459, weight = blocks_1_mlp_proj_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_1463_cast_fp16")]; + tensor new_x = add(x = var_1463_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_1464_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_v_cache_0, new_k_cache_1, new_v_cache_1); +} \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk13.mlmodelc/weights/weight.bin b/Llama-3.2-3B-Instruct_chunk13.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..0303ebcb6e35d02a6ae9c5e453d020529c288a93 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk13.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c010e4d1d6fba27b10c2fc842fe9244994286e621309812a88c461ddeb071342 +size 402679744 diff --git a/Llama-3.2-3B-Instruct_chunk14.mlmodelc/analytics/coremldata.bin b/Llama-3.2-3B-Instruct_chunk14.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a63af39cde8e590e41fffd270ab8aede737490d --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk14.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf21e446e7587de3fd840eae95f3e79729298df568725552f7ef5fd8f954e58c +size 243 diff --git a/Llama-3.2-3B-Instruct_chunk14.mlmodelc/coremldata.bin b/Llama-3.2-3B-Instruct_chunk14.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef844658693d8a7fc2951abf2761f8f5f9bc62c3 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk14.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8129d684aa1ea8b76708a186fe44f7ffc4aa08b4854907105fe41c0825e71875 +size 653 diff --git a/Llama-3.2-3B-Instruct_chunk14.mlmodelc/metadata.json b/Llama-3.2-3B-Instruct_chunk14.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..ae31a663181f3e6c6f748d526d461429f1134e53 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk14.mlmodelc/metadata.json @@ -0,0 +1,178 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 14, + "Ios16.mul" : 70, + "SliceByIndex" : 88, + "Transpose" : 2, + "Ios16.einsum" : 96, + "Ios16.conv" : 14, + "Ios16.add" : 56, + "Ios16.realDiv" : 4, + "Ios16.softmax" : 48, + "Ios16.reduceL2Norm" : 4, + "Ios16.reshape" : 14, + "Ios16.silu" : 2 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 512, 1, 64]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_3_2_3B_Instruct_2024_11_09_16_14_37_chunk14", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk14.mlmodelc/model.mil b/Llama-3.2-3B-Instruct_chunk14.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..78594b4291dc45ae43652f9a31200581b19ad3c6 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk14.mlmodelc/model.mil @@ -0,0 +1,956 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { + tensor var_13 = const()[name = tensor("op_13"), val = tensor(-1)]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-2)]; + tensor var_19 = const()[name = tensor("op_19"), val = tensor(-3)]; + tensor var_52 = const()[name = tensor("op_52"), val = tensor(1)]; + tensor var_55 = const()[name = tensor("op_55"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_52, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_55, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_79_to_fp16 = const()[name = tensor("op_79_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_79_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_100 = const()[name = tensor("op_100"), val = tensor([1, 3072, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_100, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_103 = const()[name = tensor("op_103"), val = tensor([1, 1])]; + tensor var_105 = const()[name = tensor("op_105"), val = tensor([1, 1])]; + tensor q_1_pad_type_0 = const()[name = tensor("q_1_pad_type_0"), val = tensor("custom")]; + tensor q_1_pad_0 = const()[name = tensor("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6464)))]; + tensor q_1_cast_fp16 = conv(dilations = var_105, groups = var_52, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = var_103, weight = blocks_0_attn_q_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_109 = const()[name = tensor("op_109"), val = tensor([1, 1])]; + tensor var_111 = const()[name = tensor("op_111"), val = tensor([1, 1])]; + tensor k_1_pad_type_0 = const()[name = tensor("k_1_pad_type_0"), val = tensor("custom")]; + tensor k_1_pad_0 = const()[name = tensor("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18880896)))]; + tensor k_1_cast_fp16 = conv(dilations = var_111, groups = var_52, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = var_109, weight = blocks_0_attn_k_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_115 = const()[name = tensor("op_115"), val = tensor([1, 1])]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor v_1_pad_type_0 = const()[name = tensor("v_1_pad_type_0"), val = tensor("custom")]; + tensor v_1_pad_0 = const()[name = tensor("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25172416)))]; + tensor v_1_cast_fp16 = conv(dilations = var_117, groups = var_52, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = var_115, weight = blocks_0_attn_v_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_120 = const()[name = tensor("op_120"), val = tensor([1, 24, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_120, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_122 = const()[name = tensor("op_122"), val = tensor([1, -1, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_122, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_136_begin_0 = const()[name = tensor("op_136_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_136_end_0 = const()[name = tensor("op_136_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_136_end_mask_0 = const()[name = tensor("op_136_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_136_cast_fp16 = slice_by_index(begin = var_136_begin_0, end = var_136_end_0, end_mask = var_136_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_136_cast_fp16")]; + tensor var_142_begin_0 = const()[name = tensor("op_142_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_142_end_0 = const()[name = tensor("op_142_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_142_end_mask_0 = const()[name = tensor("op_142_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_142_cast_fp16 = slice_by_index(begin = var_142_begin_0, end = var_142_end_0, end_mask = var_142_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_142_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_144_cast_fp16 = mul(x = var_142_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_144_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_17, interleave = rotated_1_interleave_0, values = (var_144_cast_fp16, var_136_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_147_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_147_cast_fp16")]; + tensor var_148_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_148_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_147_cast_fp16, y = var_148_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor var_167_begin_0 = const()[name = tensor("op_167_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_167_end_0 = const()[name = tensor("op_167_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_167_end_mask_0 = const()[name = tensor("op_167_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_167_cast_fp16 = slice_by_index(begin = var_167_begin_0, end = var_167_end_0, end_mask = var_167_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_167_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_169_cast_fp16 = mul(x = var_167_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_169_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_17, interleave = rotated_3_interleave_0, values = (var_169_cast_fp16, var_161_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_172_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_172_cast_fp16")]; + tensor var_173_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_173_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_172_cast_fp16, y = var_173_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_176 = const()[name = tensor("op_176"), val = tensor([1, -1, 1, 64])]; + tensor k_7_cast_fp16 = reshape(shape = var_176, x = roped_3_cast_fp16)[name = tensor("k_7_cast_fp16")]; + tensor var_178 = const()[name = tensor("op_178"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_178, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_11_interleave_0 = const()[name = tensor("k_11_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_9_perm_0, x = k_7_cast_fp16)[name = tensor("transpose_1")]; + tensor k_11_cast_fp16 = concat(axis = var_19, interleave = k_11_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_11_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_13, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1, 3072, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_186, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_191_begin_0 = const()[name = tensor("op_191_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_191_end_0 = const()[name = tensor("op_191_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_191_end_mask_0 = const()[name = tensor("op_191_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_191_cast_fp16 = slice_by_index(begin = var_191_begin_0, end = var_191_end_0, end_mask = var_191_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_191_cast_fp16")]; + tensor var_195_begin_0 = const()[name = tensor("op_195_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_195_end_0 = const()[name = tensor("op_195_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_195_end_mask_0 = const()[name = tensor("op_195_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_195_cast_fp16 = slice_by_index(begin = var_195_begin_0, end = var_195_end_0, end_mask = var_195_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_195_cast_fp16")]; + tensor var_199_begin_0 = const()[name = tensor("op_199_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_199_end_0 = const()[name = tensor("op_199_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_199_end_mask_0 = const()[name = tensor("op_199_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_199_cast_fp16 = slice_by_index(begin = var_199_begin_0, end = var_199_end_0, end_mask = var_199_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_199_cast_fp16")]; + tensor var_203_begin_0 = const()[name = tensor("op_203_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_203_end_0 = const()[name = tensor("op_203_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_203_end_mask_0 = const()[name = tensor("op_203_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_203_cast_fp16 = slice_by_index(begin = var_203_begin_0, end = var_203_end_0, end_mask = var_203_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_203_cast_fp16")]; + tensor var_207_begin_0 = const()[name = tensor("op_207_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_207_end_0 = const()[name = tensor("op_207_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_207_end_mask_0 = const()[name = tensor("op_207_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = var_207_end_0, end_mask = var_207_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_207_cast_fp16")]; + tensor var_211_begin_0 = const()[name = tensor("op_211_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_211_end_0 = const()[name = tensor("op_211_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_211_end_mask_0 = const()[name = tensor("op_211_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_211_cast_fp16 = slice_by_index(begin = var_211_begin_0, end = var_211_end_0, end_mask = var_211_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_211_cast_fp16")]; + tensor var_215_begin_0 = const()[name = tensor("op_215_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_215_end_0 = const()[name = tensor("op_215_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_215_end_mask_0 = const()[name = tensor("op_215_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_215_cast_fp16 = slice_by_index(begin = var_215_begin_0, end = var_215_end_0, end_mask = var_215_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_215_cast_fp16")]; + tensor var_219_begin_0 = const()[name = tensor("op_219_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_219_end_0 = const()[name = tensor("op_219_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_219_end_mask_0 = const()[name = tensor("op_219_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_219_cast_fp16 = slice_by_index(begin = var_219_begin_0, end = var_219_end_0, end_mask = var_219_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_219_cast_fp16")]; + tensor var_223_begin_0 = const()[name = tensor("op_223_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_223_end_0 = const()[name = tensor("op_223_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_223_end_mask_0 = const()[name = tensor("op_223_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_223_cast_fp16 = slice_by_index(begin = var_223_begin_0, end = var_223_end_0, end_mask = var_223_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_223_cast_fp16")]; + tensor var_227_begin_0 = const()[name = tensor("op_227_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_227_end_0 = const()[name = tensor("op_227_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_227_end_mask_0 = const()[name = tensor("op_227_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_227_cast_fp16")]; + tensor var_231_begin_0 = const()[name = tensor("op_231_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_231_end_0 = const()[name = tensor("op_231_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_231_end_mask_0 = const()[name = tensor("op_231_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_231_cast_fp16 = slice_by_index(begin = var_231_begin_0, end = var_231_end_0, end_mask = var_231_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_231_cast_fp16")]; + tensor var_235_begin_0 = const()[name = tensor("op_235_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_235_end_0 = const()[name = tensor("op_235_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_235_end_mask_0 = const()[name = tensor("op_235_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = var_235_end_0, end_mask = var_235_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_235_cast_fp16")]; + tensor var_239_begin_0 = const()[name = tensor("op_239_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_239_end_0 = const()[name = tensor("op_239_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_239_end_mask_0 = const()[name = tensor("op_239_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_239_cast_fp16")]; + tensor var_243_begin_0 = const()[name = tensor("op_243_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_243_end_0 = const()[name = tensor("op_243_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_243_end_mask_0 = const()[name = tensor("op_243_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_243_cast_fp16")]; + tensor var_247_begin_0 = const()[name = tensor("op_247_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_247_end_0 = const()[name = tensor("op_247_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_247_end_mask_0 = const()[name = tensor("op_247_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_247_cast_fp16")]; + tensor var_251_begin_0 = const()[name = tensor("op_251_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_251_end_0 = const()[name = tensor("op_251_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_251_end_mask_0 = const()[name = tensor("op_251_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_251_cast_fp16")]; + tensor var_255_begin_0 = const()[name = tensor("op_255_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_255_end_0 = const()[name = tensor("op_255_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_255_end_mask_0 = const()[name = tensor("op_255_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_255_cast_fp16")]; + tensor var_259_begin_0 = const()[name = tensor("op_259_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_259_end_0 = const()[name = tensor("op_259_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_259_end_mask_0 = const()[name = tensor("op_259_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_263_cast_fp16")]; + tensor var_267_begin_0 = const()[name = tensor("op_267_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_267_end_0 = const()[name = tensor("op_267_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_267_end_mask_0 = const()[name = tensor("op_267_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_267_cast_fp16")]; + tensor var_271_begin_0 = const()[name = tensor("op_271_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_271_end_0 = const()[name = tensor("op_271_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_271_end_mask_0 = const()[name = tensor("op_271_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_271_cast_fp16")]; + tensor var_275_begin_0 = const()[name = tensor("op_275_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_275_end_0 = const()[name = tensor("op_275_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_275_end_mask_0 = const()[name = tensor("op_275_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_275_cast_fp16")]; + tensor var_279_begin_0 = const()[name = tensor("op_279_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_279_end_0 = const()[name = tensor("op_279_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_279_end_mask_0 = const()[name = tensor("op_279_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_279_cast_fp16")]; + tensor var_283_begin_0 = const()[name = tensor("op_283_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_283_end_0 = const()[name = tensor("op_283_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_283_end_mask_0 = const()[name = tensor("op_283_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_283_cast_fp16")]; + tensor var_289_begin_0 = const()[name = tensor("op_289_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_289_end_0 = const()[name = tensor("op_289_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_289_end_mask_0 = const()[name = tensor("op_289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_289_cast_fp16")]; + tensor var_301_begin_0 = const()[name = tensor("op_301_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_301_end_0 = const()[name = tensor("op_301_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_301_end_mask_0 = const()[name = tensor("op_301_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_301_cast_fp16")]; + tensor var_313_begin_0 = const()[name = tensor("op_313_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_313_end_0 = const()[name = tensor("op_313_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_313_end_mask_0 = const()[name = tensor("op_313_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_313_cast_fp16 = slice_by_index(begin = var_313_begin_0, end = var_313_end_0, end_mask = var_313_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_313_cast_fp16")]; + tensor var_325_begin_0 = const()[name = tensor("op_325_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_325_end_0 = const()[name = tensor("op_325_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_325_end_mask_0 = const()[name = tensor("op_325_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_325_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_349_begin_0 = const()[name = tensor("op_349_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_349_end_0 = const()[name = tensor("op_349_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_349_end_mask_0 = const()[name = tensor("op_349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_349_cast_fp16 = slice_by_index(begin = var_349_begin_0, end = var_349_end_0, end_mask = var_349_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_349_cast_fp16")]; + tensor var_361_begin_0 = const()[name = tensor("op_361_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_361_end_0 = const()[name = tensor("op_361_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_361_end_mask_0 = const()[name = tensor("op_361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_361_cast_fp16 = slice_by_index(begin = var_361_begin_0, end = var_361_end_0, end_mask = var_361_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_361_cast_fp16")]; + tensor var_373_begin_0 = const()[name = tensor("op_373_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_373_end_0 = const()[name = tensor("op_373_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_373_end_mask_0 = const()[name = tensor("op_373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_373_cast_fp16")]; + tensor var_383_begin_0 = const()[name = tensor("op_383_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_383_end_0 = const()[name = tensor("op_383_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_383_end_mask_0 = const()[name = tensor("op_383_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_383_cast_fp16")]; + tensor var_395_begin_0 = const()[name = tensor("op_395_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_395_end_0 = const()[name = tensor("op_395_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_395_end_mask_0 = const()[name = tensor("op_395_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_395_cast_fp16")]; + tensor var_407_begin_0 = const()[name = tensor("op_407_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_407_end_0 = const()[name = tensor("op_407_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_407_end_mask_0 = const()[name = tensor("op_407_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_407_cast_fp16 = slice_by_index(begin = var_407_begin_0, end = var_407_end_0, end_mask = var_407_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_407_cast_fp16")]; + tensor var_419_begin_0 = const()[name = tensor("op_419_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_419_end_0 = const()[name = tensor("op_419_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_419_end_mask_0 = const()[name = tensor("op_419_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_419_cast_fp16 = slice_by_index(begin = var_419_begin_0, end = var_419_end_0, end_mask = var_419_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_419_cast_fp16")]; + tensor var_431_begin_0 = const()[name = tensor("op_431_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_431_end_0 = const()[name = tensor("op_431_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_431_end_mask_0 = const()[name = tensor("op_431_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_431_cast_fp16 = slice_by_index(begin = var_431_begin_0, end = var_431_end_0, end_mask = var_431_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_431_cast_fp16")]; + tensor var_443_begin_0 = const()[name = tensor("op_443_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_443_end_0 = const()[name = tensor("op_443_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_443_end_mask_0 = const()[name = tensor("op_443_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_443_cast_fp16 = slice_by_index(begin = var_443_begin_0, end = var_443_end_0, end_mask = var_443_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_443_cast_fp16")]; + tensor var_455_begin_0 = const()[name = tensor("op_455_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_455_end_0 = const()[name = tensor("op_455_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_455_end_mask_0 = const()[name = tensor("op_455_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_455_cast_fp16")]; + tensor var_467_begin_0 = const()[name = tensor("op_467_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_467_end_0 = const()[name = tensor("op_467_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_467_end_mask_0 = const()[name = tensor("op_467_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_467_cast_fp16 = slice_by_index(begin = var_467_begin_0, end = var_467_end_0, end_mask = var_467_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_467_cast_fp16")]; + tensor var_479_equation_0 = const()[name = tensor("op_479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_289_cast_fp16, var_191_cast_fp16))[name = tensor("op_479_cast_fp16")]; + tensor var_480_to_fp16 = const()[name = tensor("op_480_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_481_cast_fp16 = mul(x = var_479_cast_fp16, y = var_480_to_fp16)[name = tensor("op_481_cast_fp16")]; + tensor var_483_equation_0 = const()[name = tensor("op_483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_289_cast_fp16, var_195_cast_fp16))[name = tensor("op_483_cast_fp16")]; + tensor var_484_to_fp16 = const()[name = tensor("op_484_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_485_cast_fp16 = mul(x = var_483_cast_fp16, y = var_484_to_fp16)[name = tensor("op_485_cast_fp16")]; + tensor var_487_equation_0 = const()[name = tensor("op_487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_487_cast_fp16 = einsum(equation = var_487_equation_0, values = (var_289_cast_fp16, var_199_cast_fp16))[name = tensor("op_487_cast_fp16")]; + tensor var_488_to_fp16 = const()[name = tensor("op_488_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_489_cast_fp16 = mul(x = var_487_cast_fp16, y = var_488_to_fp16)[name = tensor("op_489_cast_fp16")]; + tensor var_491_equation_0 = const()[name = tensor("op_491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_491_cast_fp16 = einsum(equation = var_491_equation_0, values = (var_301_cast_fp16, var_203_cast_fp16))[name = tensor("op_491_cast_fp16")]; + tensor var_492_to_fp16 = const()[name = tensor("op_492_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_493_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = tensor("op_493_cast_fp16")]; + tensor var_495_equation_0 = const()[name = tensor("op_495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_495_cast_fp16 = einsum(equation = var_495_equation_0, values = (var_301_cast_fp16, var_207_cast_fp16))[name = tensor("op_495_cast_fp16")]; + tensor var_496_to_fp16 = const()[name = tensor("op_496_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_497_cast_fp16 = mul(x = var_495_cast_fp16, y = var_496_to_fp16)[name = tensor("op_497_cast_fp16")]; + tensor var_499_equation_0 = const()[name = tensor("op_499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_499_cast_fp16 = einsum(equation = var_499_equation_0, values = (var_301_cast_fp16, var_211_cast_fp16))[name = tensor("op_499_cast_fp16")]; + tensor var_500_to_fp16 = const()[name = tensor("op_500_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_501_cast_fp16 = mul(x = var_499_cast_fp16, y = var_500_to_fp16)[name = tensor("op_501_cast_fp16")]; + tensor var_503_equation_0 = const()[name = tensor("op_503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_503_cast_fp16 = einsum(equation = var_503_equation_0, values = (var_313_cast_fp16, var_215_cast_fp16))[name = tensor("op_503_cast_fp16")]; + tensor var_504_to_fp16 = const()[name = tensor("op_504_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_505_cast_fp16 = mul(x = var_503_cast_fp16, y = var_504_to_fp16)[name = tensor("op_505_cast_fp16")]; + tensor var_507_equation_0 = const()[name = tensor("op_507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_507_cast_fp16 = einsum(equation = var_507_equation_0, values = (var_313_cast_fp16, var_219_cast_fp16))[name = tensor("op_507_cast_fp16")]; + tensor var_508_to_fp16 = const()[name = tensor("op_508_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16, y = var_508_to_fp16)[name = tensor("op_509_cast_fp16")]; + tensor var_511_equation_0 = const()[name = tensor("op_511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_511_cast_fp16 = einsum(equation = var_511_equation_0, values = (var_313_cast_fp16, var_223_cast_fp16))[name = tensor("op_511_cast_fp16")]; + tensor var_512_to_fp16 = const()[name = tensor("op_512_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_513_cast_fp16 = mul(x = var_511_cast_fp16, y = var_512_to_fp16)[name = tensor("op_513_cast_fp16")]; + tensor var_515_equation_0 = const()[name = tensor("op_515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_515_cast_fp16 = einsum(equation = var_515_equation_0, values = (var_325_cast_fp16, var_227_cast_fp16))[name = tensor("op_515_cast_fp16")]; + tensor var_516_to_fp16 = const()[name = tensor("op_516_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_517_cast_fp16 = mul(x = var_515_cast_fp16, y = var_516_to_fp16)[name = tensor("op_517_cast_fp16")]; + tensor var_519_equation_0 = const()[name = tensor("op_519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_519_cast_fp16 = einsum(equation = var_519_equation_0, values = (var_325_cast_fp16, var_231_cast_fp16))[name = tensor("op_519_cast_fp16")]; + tensor var_520_to_fp16 = const()[name = tensor("op_520_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_521_cast_fp16 = mul(x = var_519_cast_fp16, y = var_520_to_fp16)[name = tensor("op_521_cast_fp16")]; + tensor var_523_equation_0 = const()[name = tensor("op_523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_325_cast_fp16, var_235_cast_fp16))[name = tensor("op_523_cast_fp16")]; + tensor var_524_to_fp16 = const()[name = tensor("op_524_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_525_cast_fp16 = mul(x = var_523_cast_fp16, y = var_524_to_fp16)[name = tensor("op_525_cast_fp16")]; + tensor var_527_equation_0 = const()[name = tensor("op_527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_337_cast_fp16, var_239_cast_fp16))[name = tensor("op_527_cast_fp16")]; + tensor var_528_to_fp16 = const()[name = tensor("op_528_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_529_cast_fp16 = mul(x = var_527_cast_fp16, y = var_528_to_fp16)[name = tensor("op_529_cast_fp16")]; + tensor var_531_equation_0 = const()[name = tensor("op_531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_337_cast_fp16, var_243_cast_fp16))[name = tensor("op_531_cast_fp16")]; + tensor var_532_to_fp16 = const()[name = tensor("op_532_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_533_cast_fp16 = mul(x = var_531_cast_fp16, y = var_532_to_fp16)[name = tensor("op_533_cast_fp16")]; + tensor var_535_equation_0 = const()[name = tensor("op_535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_337_cast_fp16, var_247_cast_fp16))[name = tensor("op_535_cast_fp16")]; + tensor var_536_to_fp16 = const()[name = tensor("op_536_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_537_cast_fp16 = mul(x = var_535_cast_fp16, y = var_536_to_fp16)[name = tensor("op_537_cast_fp16")]; + tensor var_539_equation_0 = const()[name = tensor("op_539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_539_cast_fp16 = einsum(equation = var_539_equation_0, values = (var_349_cast_fp16, var_251_cast_fp16))[name = tensor("op_539_cast_fp16")]; + tensor var_540_to_fp16 = const()[name = tensor("op_540_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_541_cast_fp16 = mul(x = var_539_cast_fp16, y = var_540_to_fp16)[name = tensor("op_541_cast_fp16")]; + tensor var_543_equation_0 = const()[name = tensor("op_543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_543_cast_fp16 = einsum(equation = var_543_equation_0, values = (var_349_cast_fp16, var_255_cast_fp16))[name = tensor("op_543_cast_fp16")]; + tensor var_544_to_fp16 = const()[name = tensor("op_544_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_545_cast_fp16 = mul(x = var_543_cast_fp16, y = var_544_to_fp16)[name = tensor("op_545_cast_fp16")]; + tensor var_547_equation_0 = const()[name = tensor("op_547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_349_cast_fp16, var_259_cast_fp16))[name = tensor("op_547_cast_fp16")]; + tensor var_548_to_fp16 = const()[name = tensor("op_548_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_549_cast_fp16 = mul(x = var_547_cast_fp16, y = var_548_to_fp16)[name = tensor("op_549_cast_fp16")]; + tensor var_551_equation_0 = const()[name = tensor("op_551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_361_cast_fp16, var_263_cast_fp16))[name = tensor("op_551_cast_fp16")]; + tensor var_552_to_fp16 = const()[name = tensor("op_552_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_553_cast_fp16 = mul(x = var_551_cast_fp16, y = var_552_to_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_555_equation_0 = const()[name = tensor("op_555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_361_cast_fp16, var_267_cast_fp16))[name = tensor("op_555_cast_fp16")]; + tensor var_556_to_fp16 = const()[name = tensor("op_556_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_557_cast_fp16 = mul(x = var_555_cast_fp16, y = var_556_to_fp16)[name = tensor("op_557_cast_fp16")]; + tensor var_559_equation_0 = const()[name = tensor("op_559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_361_cast_fp16, var_271_cast_fp16))[name = tensor("op_559_cast_fp16")]; + tensor var_560_to_fp16 = const()[name = tensor("op_560_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = var_560_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor var_563_equation_0 = const()[name = tensor("op_563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_373_cast_fp16, var_275_cast_fp16))[name = tensor("op_563_cast_fp16")]; + tensor var_564_to_fp16 = const()[name = tensor("op_564_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_565_cast_fp16 = mul(x = var_563_cast_fp16, y = var_564_to_fp16)[name = tensor("op_565_cast_fp16")]; + tensor var_567_equation_0 = const()[name = tensor("op_567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_373_cast_fp16, var_279_cast_fp16))[name = tensor("op_567_cast_fp16")]; + tensor var_568_to_fp16 = const()[name = tensor("op_568_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_569_cast_fp16 = mul(x = var_567_cast_fp16, y = var_568_to_fp16)[name = tensor("op_569_cast_fp16")]; + tensor var_571_equation_0 = const()[name = tensor("op_571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_373_cast_fp16, var_283_cast_fp16))[name = tensor("op_571_cast_fp16")]; + tensor var_572_to_fp16 = const()[name = tensor("op_572_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_573_cast_fp16 = mul(x = var_571_cast_fp16, y = var_572_to_fp16)[name = tensor("op_573_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_481_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_485_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_489_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_493_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_497_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_501_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_505_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_509_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_513_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_517_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_521_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_525_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_529_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_533_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_537_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_541_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_545_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_549_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_553_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_557_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_561_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_565_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_569_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_573_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor var_598_cast_fp16 = softmax(axis = var_52, x = aw_1_cast_fp16)[name = tensor("op_598_cast_fp16")]; + tensor var_599_cast_fp16 = softmax(axis = var_52, x = aw_3_cast_fp16)[name = tensor("op_599_cast_fp16")]; + tensor var_600_cast_fp16 = softmax(axis = var_52, x = aw_5_cast_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_601_cast_fp16 = softmax(axis = var_52, x = aw_7_cast_fp16)[name = tensor("op_601_cast_fp16")]; + tensor var_602_cast_fp16 = softmax(axis = var_52, x = aw_9_cast_fp16)[name = tensor("op_602_cast_fp16")]; + tensor var_603_cast_fp16 = softmax(axis = var_52, x = aw_11_cast_fp16)[name = tensor("op_603_cast_fp16")]; + tensor var_604_cast_fp16 = softmax(axis = var_52, x = aw_13_cast_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_605_cast_fp16 = softmax(axis = var_52, x = aw_15_cast_fp16)[name = tensor("op_605_cast_fp16")]; + tensor var_606_cast_fp16 = softmax(axis = var_52, x = aw_17_cast_fp16)[name = tensor("op_606_cast_fp16")]; + tensor var_607_cast_fp16 = softmax(axis = var_52, x = aw_19_cast_fp16)[name = tensor("op_607_cast_fp16")]; + tensor var_608_cast_fp16 = softmax(axis = var_52, x = aw_21_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_609_cast_fp16 = softmax(axis = var_52, x = aw_23_cast_fp16)[name = tensor("op_609_cast_fp16")]; + tensor var_610_cast_fp16 = softmax(axis = var_52, x = aw_25_cast_fp16)[name = tensor("op_610_cast_fp16")]; + tensor var_611_cast_fp16 = softmax(axis = var_52, x = aw_27_cast_fp16)[name = tensor("op_611_cast_fp16")]; + tensor var_612_cast_fp16 = softmax(axis = var_52, x = aw_29_cast_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_613_cast_fp16 = softmax(axis = var_52, x = aw_31_cast_fp16)[name = tensor("op_613_cast_fp16")]; + tensor var_614_cast_fp16 = softmax(axis = var_52, x = aw_33_cast_fp16)[name = tensor("op_614_cast_fp16")]; + tensor var_615_cast_fp16 = softmax(axis = var_52, x = aw_35_cast_fp16)[name = tensor("op_615_cast_fp16")]; + tensor var_616_cast_fp16 = softmax(axis = var_52, x = aw_37_cast_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_617_cast_fp16 = softmax(axis = var_52, x = aw_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618_cast_fp16 = softmax(axis = var_52, x = aw_41_cast_fp16)[name = tensor("op_618_cast_fp16")]; + tensor var_619_cast_fp16 = softmax(axis = var_52, x = aw_43_cast_fp16)[name = tensor("op_619_cast_fp16")]; + tensor var_620_cast_fp16 = softmax(axis = var_52, x = aw_45_cast_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_621_cast_fp16 = softmax(axis = var_52, x = aw_47_cast_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_623_equation_0 = const()[name = tensor("op_623_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_383_cast_fp16, var_598_cast_fp16))[name = tensor("op_623_cast_fp16")]; + tensor var_625_equation_0 = const()[name = tensor("op_625_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_383_cast_fp16, var_599_cast_fp16))[name = tensor("op_625_cast_fp16")]; + tensor var_627_equation_0 = const()[name = tensor("op_627_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_383_cast_fp16, var_600_cast_fp16))[name = tensor("op_627_cast_fp16")]; + tensor var_629_equation_0 = const()[name = tensor("op_629_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_395_cast_fp16, var_601_cast_fp16))[name = tensor("op_629_cast_fp16")]; + tensor var_631_equation_0 = const()[name = tensor("op_631_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_395_cast_fp16, var_602_cast_fp16))[name = tensor("op_631_cast_fp16")]; + tensor var_633_equation_0 = const()[name = tensor("op_633_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_395_cast_fp16, var_603_cast_fp16))[name = tensor("op_633_cast_fp16")]; + tensor var_635_equation_0 = const()[name = tensor("op_635_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_407_cast_fp16, var_604_cast_fp16))[name = tensor("op_635_cast_fp16")]; + tensor var_637_equation_0 = const()[name = tensor("op_637_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_407_cast_fp16, var_605_cast_fp16))[name = tensor("op_637_cast_fp16")]; + tensor var_639_equation_0 = const()[name = tensor("op_639_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_407_cast_fp16, var_606_cast_fp16))[name = tensor("op_639_cast_fp16")]; + tensor var_641_equation_0 = const()[name = tensor("op_641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_419_cast_fp16, var_607_cast_fp16))[name = tensor("op_641_cast_fp16")]; + tensor var_643_equation_0 = const()[name = tensor("op_643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_419_cast_fp16, var_608_cast_fp16))[name = tensor("op_643_cast_fp16")]; + tensor var_645_equation_0 = const()[name = tensor("op_645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_419_cast_fp16, var_609_cast_fp16))[name = tensor("op_645_cast_fp16")]; + tensor var_647_equation_0 = const()[name = tensor("op_647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_647_cast_fp16 = einsum(equation = var_647_equation_0, values = (var_431_cast_fp16, var_610_cast_fp16))[name = tensor("op_647_cast_fp16")]; + tensor var_649_equation_0 = const()[name = tensor("op_649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_649_cast_fp16 = einsum(equation = var_649_equation_0, values = (var_431_cast_fp16, var_611_cast_fp16))[name = tensor("op_649_cast_fp16")]; + tensor var_651_equation_0 = const()[name = tensor("op_651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_651_cast_fp16 = einsum(equation = var_651_equation_0, values = (var_431_cast_fp16, var_612_cast_fp16))[name = tensor("op_651_cast_fp16")]; + tensor var_653_equation_0 = const()[name = tensor("op_653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_653_cast_fp16 = einsum(equation = var_653_equation_0, values = (var_443_cast_fp16, var_613_cast_fp16))[name = tensor("op_653_cast_fp16")]; + tensor var_655_equation_0 = const()[name = tensor("op_655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_655_cast_fp16 = einsum(equation = var_655_equation_0, values = (var_443_cast_fp16, var_614_cast_fp16))[name = tensor("op_655_cast_fp16")]; + tensor var_657_equation_0 = const()[name = tensor("op_657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_657_cast_fp16 = einsum(equation = var_657_equation_0, values = (var_443_cast_fp16, var_615_cast_fp16))[name = tensor("op_657_cast_fp16")]; + tensor var_659_equation_0 = const()[name = tensor("op_659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_659_cast_fp16 = einsum(equation = var_659_equation_0, values = (var_455_cast_fp16, var_616_cast_fp16))[name = tensor("op_659_cast_fp16")]; + tensor var_661_equation_0 = const()[name = tensor("op_661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_661_cast_fp16 = einsum(equation = var_661_equation_0, values = (var_455_cast_fp16, var_617_cast_fp16))[name = tensor("op_661_cast_fp16")]; + tensor var_663_equation_0 = const()[name = tensor("op_663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_663_cast_fp16 = einsum(equation = var_663_equation_0, values = (var_455_cast_fp16, var_618_cast_fp16))[name = tensor("op_663_cast_fp16")]; + tensor var_665_equation_0 = const()[name = tensor("op_665_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_665_cast_fp16 = einsum(equation = var_665_equation_0, values = (var_467_cast_fp16, var_619_cast_fp16))[name = tensor("op_665_cast_fp16")]; + tensor var_667_equation_0 = const()[name = tensor("op_667_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_667_cast_fp16 = einsum(equation = var_667_equation_0, values = (var_467_cast_fp16, var_620_cast_fp16))[name = tensor("op_667_cast_fp16")]; + tensor var_669_equation_0 = const()[name = tensor("op_669_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_669_cast_fp16 = einsum(equation = var_669_equation_0, values = (var_467_cast_fp16, var_621_cast_fp16))[name = tensor("op_669_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_52, interleave = x_11_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16, var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16, var_647_cast_fp16, var_649_cast_fp16, var_651_cast_fp16, var_653_cast_fp16, var_655_cast_fp16, var_657_cast_fp16, var_659_cast_fp16, var_661_cast_fp16, var_663_cast_fp16, var_665_cast_fp16, var_667_cast_fp16, var_669_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_674 = const()[name = tensor("op_674"), val = tensor([1, 3072, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_674, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_677 = const()[name = tensor("op_677"), val = tensor([1, 1])]; + tensor var_679 = const()[name = tensor("op_679"), val = tensor([1, 1])]; + tensor attention_output_1_pad_type_0 = const()[name = tensor("attention_output_1_pad_type_0"), val = tensor("custom")]; + tensor attention_output_1_pad_0 = const()[name = tensor("attention_output_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31463936)))]; + tensor attention_output_1_cast_fp16 = conv(dilations = var_679, groups = var_52, pad = attention_output_1_pad_0, pad_type = attention_output_1_pad_type_0, strides = var_677, weight = blocks_0_attn_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338368)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_52, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_55, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_705_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338560)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_716 = const()[name = tensor("op_716"), val = tensor([1, 1])]; + tensor var_718 = const()[name = tensor("op_718"), val = tensor([1, 1])]; + tensor input_7_pad_type_0 = const()[name = tensor("input_7_pad_type_0"), val = tensor("custom")]; + tensor input_7_pad_0 = const()[name = tensor("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50344768)))]; + tensor input_7_cast_fp16 = conv(dilations = var_718, groups = var_52, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_716, weight = blocks_0_mlp_fc_1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_722 = const()[name = tensor("op_722"), val = tensor([1, 1])]; + tensor var_724 = const()[name = tensor("op_724"), val = tensor([1, 1])]; + tensor x_fc_2_1_pad_type_0 = const()[name = tensor("x_fc_2_1_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_1_pad_0 = const()[name = tensor("x_fc_2_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100676480)))]; + tensor x_fc_2_1_cast_fp16 = conv(dilations = var_724, groups = var_52, pad = x_fc_2_1_pad_0, pad_type = x_fc_2_1_pad_type_0, strides = var_722, weight = blocks_0_mlp_fc_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_727_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_727_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_727_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_730 = const()[name = tensor("op_730"), val = tensor([1, 1])]; + tensor var_732 = const()[name = tensor("op_732"), val = tensor([1, 1])]; + tensor var_734_pad_type_0 = const()[name = tensor("op_734_pad_type_0"), val = tensor("custom")]; + tensor var_734_pad_0 = const()[name = tensor("op_734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151008192)))]; + tensor var_734_cast_fp16 = conv(dilations = var_732, groups = var_52, pad = var_734_pad_0, pad_type = var_734_pad_type_0, strides = var_730, weight = blocks_0_mlp_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("op_734_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_734_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_740 = const()[name = tensor("op_740"), val = tensor(-1)]; + tensor var_744 = const()[name = tensor("op_744"), val = tensor(-2)]; + tensor var_746 = const()[name = tensor("op_746"), val = tensor(-3)]; + tensor var_779 = const()[name = tensor("op_779"), val = tensor(1)]; + tensor var_782 = const()[name = tensor("op_782"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201339904)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_779, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_782, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_805_to_fp16 = const()[name = tensor("op_805_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_805_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201340096)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_829 = const()[name = tensor("op_829"), val = tensor([1, 3072, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_829, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_832 = const()[name = tensor("op_832"), val = tensor([1, 1])]; + tensor var_834 = const()[name = tensor("op_834"), val = tensor([1, 1])]; + tensor q_9_pad_type_0 = const()[name = tensor("q_9_pad_type_0"), val = tensor("custom")]; + tensor q_9_pad_0 = const()[name = tensor("q_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201346304)))]; + tensor q_9_cast_fp16 = conv(dilations = var_834, groups = var_779, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = var_832, weight = blocks_1_attn_q_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_838 = const()[name = tensor("op_838"), val = tensor([1, 1])]; + tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 1])]; + tensor k_13_pad_type_0 = const()[name = tensor("k_13_pad_type_0"), val = tensor("custom")]; + tensor k_13_pad_0 = const()[name = tensor("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220220736)))]; + tensor k_13_cast_fp16 = conv(dilations = var_840, groups = var_779, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = var_838, weight = blocks_1_attn_k_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_844 = const()[name = tensor("op_844"), val = tensor([1, 1])]; + tensor var_846 = const()[name = tensor("op_846"), val = tensor([1, 1])]; + tensor v_11_pad_type_0 = const()[name = tensor("v_11_pad_type_0"), val = tensor("custom")]; + tensor v_11_pad_0 = const()[name = tensor("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226512256)))]; + tensor v_11_cast_fp16 = conv(dilations = var_846, groups = var_779, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = var_844, weight = blocks_1_attn_v_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_849 = const()[name = tensor("op_849"), val = tensor([1, 24, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_849, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_851 = const()[name = tensor("op_851"), val = tensor([1, -1, 128, 64])]; + tensor k_15_cast_fp16 = reshape(shape = var_851, x = k_13_cast_fp16)[name = tensor("k_15_cast_fp16")]; + tensor var_865_begin_0 = const()[name = tensor("op_865_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_865_end_0 = const()[name = tensor("op_865_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_865_end_mask_0 = const()[name = tensor("op_865_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_865_cast_fp16")]; + tensor var_871_begin_0 = const()[name = tensor("op_871_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_871_end_0 = const()[name = tensor("op_871_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_871_end_mask_0 = const()[name = tensor("op_871_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_871_cast_fp16 = slice_by_index(begin = var_871_begin_0, end = var_871_end_0, end_mask = var_871_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_871_cast_fp16")]; + tensor const_30_promoted_to_fp16 = const()[name = tensor("const_30_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_873_cast_fp16 = mul(x = var_871_cast_fp16, y = const_30_promoted_to_fp16)[name = tensor("op_873_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_744, interleave = rotated_5_interleave_0, values = (var_873_cast_fp16, var_865_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_876_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_876_cast_fp16")]; + tensor var_877_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_877_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_876_cast_fp16, y = var_877_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_890_begin_0 = const()[name = tensor("op_890_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_890_end_0 = const()[name = tensor("op_890_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_890_end_mask_0 = const()[name = tensor("op_890_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_890_cast_fp16 = slice_by_index(begin = var_890_begin_0, end = var_890_end_0, end_mask = var_890_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_890_cast_fp16")]; + tensor var_896_begin_0 = const()[name = tensor("op_896_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_896_end_0 = const()[name = tensor("op_896_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_896_end_mask_0 = const()[name = tensor("op_896_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_896_cast_fp16 = slice_by_index(begin = var_896_begin_0, end = var_896_end_0, end_mask = var_896_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_896_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_898_cast_fp16 = mul(x = var_896_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_898_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_744, interleave = rotated_interleave_0, values = (var_898_cast_fp16, var_890_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_901_cast_fp16 = mul(x = k_15_cast_fp16, y = cos)[name = tensor("op_901_cast_fp16")]; + tensor var_902_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_902_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_901_cast_fp16, y = var_902_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_905 = const()[name = tensor("op_905"), val = tensor([1, -1, 1, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_905, x = roped_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_907 = const()[name = tensor("op_907"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_907, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_21_perm_0, x = k_19_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_746, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_740, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_915 = const()[name = tensor("op_915"), val = tensor([1, 3072, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_915, x = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_920_begin_0 = const()[name = tensor("op_920_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_end_0 = const()[name = tensor("op_920_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_920_end_mask_0 = const()[name = tensor("op_920_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = q_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor var_924_begin_0 = const()[name = tensor("op_924_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_924_end_0 = const()[name = tensor("op_924_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_924_end_mask_0 = const()[name = tensor("op_924_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_924_cast_fp16 = slice_by_index(begin = var_924_begin_0, end = var_924_end_0, end_mask = var_924_end_mask_0, x = q_cast_fp16)[name = tensor("op_924_cast_fp16")]; + tensor var_928_begin_0 = const()[name = tensor("op_928_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_928_end_0 = const()[name = tensor("op_928_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_928_end_mask_0 = const()[name = tensor("op_928_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = q_cast_fp16)[name = tensor("op_928_cast_fp16")]; + tensor var_932_begin_0 = const()[name = tensor("op_932_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_932_end_0 = const()[name = tensor("op_932_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_932_end_mask_0 = const()[name = tensor("op_932_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_932_cast_fp16 = slice_by_index(begin = var_932_begin_0, end = var_932_end_0, end_mask = var_932_end_mask_0, x = q_cast_fp16)[name = tensor("op_932_cast_fp16")]; + tensor var_936_begin_0 = const()[name = tensor("op_936_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_936_end_0 = const()[name = tensor("op_936_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_936_end_mask_0 = const()[name = tensor("op_936_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_936_cast_fp16 = slice_by_index(begin = var_936_begin_0, end = var_936_end_0, end_mask = var_936_end_mask_0, x = q_cast_fp16)[name = tensor("op_936_cast_fp16")]; + tensor var_940_begin_0 = const()[name = tensor("op_940_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_940_end_0 = const()[name = tensor("op_940_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_940_end_mask_0 = const()[name = tensor("op_940_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_940_cast_fp16 = slice_by_index(begin = var_940_begin_0, end = var_940_end_0, end_mask = var_940_end_mask_0, x = q_cast_fp16)[name = tensor("op_940_cast_fp16")]; + tensor var_944_begin_0 = const()[name = tensor("op_944_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_944_end_0 = const()[name = tensor("op_944_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_944_end_mask_0 = const()[name = tensor("op_944_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_944_cast_fp16 = slice_by_index(begin = var_944_begin_0, end = var_944_end_0, end_mask = var_944_end_mask_0, x = q_cast_fp16)[name = tensor("op_944_cast_fp16")]; + tensor var_948_begin_0 = const()[name = tensor("op_948_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_948_end_0 = const()[name = tensor("op_948_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_948_end_mask_0 = const()[name = tensor("op_948_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_948_cast_fp16 = slice_by_index(begin = var_948_begin_0, end = var_948_end_0, end_mask = var_948_end_mask_0, x = q_cast_fp16)[name = tensor("op_948_cast_fp16")]; + tensor var_952_begin_0 = const()[name = tensor("op_952_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_952_end_0 = const()[name = tensor("op_952_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_952_end_mask_0 = const()[name = tensor("op_952_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_952_cast_fp16 = slice_by_index(begin = var_952_begin_0, end = var_952_end_0, end_mask = var_952_end_mask_0, x = q_cast_fp16)[name = tensor("op_952_cast_fp16")]; + tensor var_956_begin_0 = const()[name = tensor("op_956_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_956_end_0 = const()[name = tensor("op_956_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_956_end_mask_0 = const()[name = tensor("op_956_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_956_cast_fp16 = slice_by_index(begin = var_956_begin_0, end = var_956_end_0, end_mask = var_956_end_mask_0, x = q_cast_fp16)[name = tensor("op_956_cast_fp16")]; + tensor var_960_begin_0 = const()[name = tensor("op_960_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_960_end_0 = const()[name = tensor("op_960_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_960_end_mask_0 = const()[name = tensor("op_960_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_960_cast_fp16 = slice_by_index(begin = var_960_begin_0, end = var_960_end_0, end_mask = var_960_end_mask_0, x = q_cast_fp16)[name = tensor("op_960_cast_fp16")]; + tensor var_964_begin_0 = const()[name = tensor("op_964_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_964_end_0 = const()[name = tensor("op_964_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_964_end_mask_0 = const()[name = tensor("op_964_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_964_cast_fp16 = slice_by_index(begin = var_964_begin_0, end = var_964_end_0, end_mask = var_964_end_mask_0, x = q_cast_fp16)[name = tensor("op_964_cast_fp16")]; + tensor var_968_begin_0 = const()[name = tensor("op_968_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_968_end_0 = const()[name = tensor("op_968_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_968_end_mask_0 = const()[name = tensor("op_968_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = q_cast_fp16)[name = tensor("op_968_cast_fp16")]; + tensor var_972_begin_0 = const()[name = tensor("op_972_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_972_end_0 = const()[name = tensor("op_972_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_972_end_mask_0 = const()[name = tensor("op_972_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = q_cast_fp16)[name = tensor("op_972_cast_fp16")]; + tensor var_976_begin_0 = const()[name = tensor("op_976_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_976_end_0 = const()[name = tensor("op_976_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_976_end_mask_0 = const()[name = tensor("op_976_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = var_976_end_0, end_mask = var_976_end_mask_0, x = q_cast_fp16)[name = tensor("op_976_cast_fp16")]; + tensor var_980_begin_0 = const()[name = tensor("op_980_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_980_end_0 = const()[name = tensor("op_980_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_980_end_mask_0 = const()[name = tensor("op_980_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_980_cast_fp16 = slice_by_index(begin = var_980_begin_0, end = var_980_end_0, end_mask = var_980_end_mask_0, x = q_cast_fp16)[name = tensor("op_980_cast_fp16")]; + tensor var_984_begin_0 = const()[name = tensor("op_984_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_984_end_0 = const()[name = tensor("op_984_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_984_end_mask_0 = const()[name = tensor("op_984_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_984_cast_fp16 = slice_by_index(begin = var_984_begin_0, end = var_984_end_0, end_mask = var_984_end_mask_0, x = q_cast_fp16)[name = tensor("op_984_cast_fp16")]; + tensor var_988_begin_0 = const()[name = tensor("op_988_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_988_end_0 = const()[name = tensor("op_988_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_988_end_mask_0 = const()[name = tensor("op_988_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_988_cast_fp16 = slice_by_index(begin = var_988_begin_0, end = var_988_end_0, end_mask = var_988_end_mask_0, x = q_cast_fp16)[name = tensor("op_988_cast_fp16")]; + tensor var_992_begin_0 = const()[name = tensor("op_992_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_992_end_0 = const()[name = tensor("op_992_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_992_end_mask_0 = const()[name = tensor("op_992_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_992_cast_fp16 = slice_by_index(begin = var_992_begin_0, end = var_992_end_0, end_mask = var_992_end_mask_0, x = q_cast_fp16)[name = tensor("op_992_cast_fp16")]; + tensor var_996_begin_0 = const()[name = tensor("op_996_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_996_end_0 = const()[name = tensor("op_996_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_996_end_mask_0 = const()[name = tensor("op_996_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_996_cast_fp16 = slice_by_index(begin = var_996_begin_0, end = var_996_end_0, end_mask = var_996_end_mask_0, x = q_cast_fp16)[name = tensor("op_996_cast_fp16")]; + tensor var_1000_begin_0 = const()[name = tensor("op_1000_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1000_end_0 = const()[name = tensor("op_1000_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1000_end_mask_0 = const()[name = tensor("op_1000_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = var_1000_end_0, end_mask = var_1000_end_mask_0, x = q_cast_fp16)[name = tensor("op_1000_cast_fp16")]; + tensor var_1004_begin_0 = const()[name = tensor("op_1004_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1004_end_0 = const()[name = tensor("op_1004_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1004_end_mask_0 = const()[name = tensor("op_1004_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1004_cast_fp16 = slice_by_index(begin = var_1004_begin_0, end = var_1004_end_0, end_mask = var_1004_end_mask_0, x = q_cast_fp16)[name = tensor("op_1004_cast_fp16")]; + tensor var_1008_begin_0 = const()[name = tensor("op_1008_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1008_end_0 = const()[name = tensor("op_1008_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1008_end_mask_0 = const()[name = tensor("op_1008_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = var_1008_end_0, end_mask = var_1008_end_mask_0, x = q_cast_fp16)[name = tensor("op_1008_cast_fp16")]; + tensor var_1012_begin_0 = const()[name = tensor("op_1012_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1012_end_0 = const()[name = tensor("op_1012_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1012_end_mask_0 = const()[name = tensor("op_1012_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1012_cast_fp16 = slice_by_index(begin = var_1012_begin_0, end = var_1012_end_0, end_mask = var_1012_end_mask_0, x = q_cast_fp16)[name = tensor("op_1012_cast_fp16")]; + tensor var_1018_begin_0 = const()[name = tensor("op_1018_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1018_end_0 = const()[name = tensor("op_1018_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1018_end_mask_0 = const()[name = tensor("op_1018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1018_cast_fp16 = slice_by_index(begin = var_1018_begin_0, end = var_1018_end_0, end_mask = var_1018_end_mask_0, x = k_cast_fp16)[name = tensor("op_1018_cast_fp16")]; + tensor var_1030_begin_0 = const()[name = tensor("op_1030_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1030_end_0 = const()[name = tensor("op_1030_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1030_end_mask_0 = const()[name = tensor("op_1030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, x = k_cast_fp16)[name = tensor("op_1030_cast_fp16")]; + tensor var_1042_begin_0 = const()[name = tensor("op_1042_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1042_end_0 = const()[name = tensor("op_1042_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1042_end_mask_0 = const()[name = tensor("op_1042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = k_cast_fp16)[name = tensor("op_1042_cast_fp16")]; + tensor var_1054_begin_0 = const()[name = tensor("op_1054_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1054_end_0 = const()[name = tensor("op_1054_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1054_end_mask_0 = const()[name = tensor("op_1054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1054_cast_fp16 = slice_by_index(begin = var_1054_begin_0, end = var_1054_end_0, end_mask = var_1054_end_mask_0, x = k_cast_fp16)[name = tensor("op_1054_cast_fp16")]; + tensor var_1066_begin_0 = const()[name = tensor("op_1066_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1066_end_0 = const()[name = tensor("op_1066_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1066_end_mask_0 = const()[name = tensor("op_1066_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1066_cast_fp16 = slice_by_index(begin = var_1066_begin_0, end = var_1066_end_0, end_mask = var_1066_end_mask_0, x = k_cast_fp16)[name = tensor("op_1066_cast_fp16")]; + tensor var_1078_begin_0 = const()[name = tensor("op_1078_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1078_end_0 = const()[name = tensor("op_1078_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1078_end_mask_0 = const()[name = tensor("op_1078_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1078_cast_fp16 = slice_by_index(begin = var_1078_begin_0, end = var_1078_end_0, end_mask = var_1078_end_mask_0, x = k_cast_fp16)[name = tensor("op_1078_cast_fp16")]; + tensor var_1090_begin_0 = const()[name = tensor("op_1090_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1090_end_0 = const()[name = tensor("op_1090_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1090_end_mask_0 = const()[name = tensor("op_1090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = k_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor var_1102_begin_0 = const()[name = tensor("op_1102_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1102_end_0 = const()[name = tensor("op_1102_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1102_end_mask_0 = const()[name = tensor("op_1102_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1102_cast_fp16 = slice_by_index(begin = var_1102_begin_0, end = var_1102_end_0, end_mask = var_1102_end_mask_0, x = k_cast_fp16)[name = tensor("op_1102_cast_fp16")]; + tensor var_1112_begin_0 = const()[name = tensor("op_1112_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1112_end_0 = const()[name = tensor("op_1112_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1112_end_mask_0 = const()[name = tensor("op_1112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1112_cast_fp16")]; + tensor var_1124_begin_0 = const()[name = tensor("op_1124_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1124_end_0 = const()[name = tensor("op_1124_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1124_end_mask_0 = const()[name = tensor("op_1124_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1124_cast_fp16")]; + tensor var_1136_begin_0 = const()[name = tensor("op_1136_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1136_end_0 = const()[name = tensor("op_1136_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1136_end_mask_0 = const()[name = tensor("op_1136_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1136_cast_fp16")]; + tensor var_1148_begin_0 = const()[name = tensor("op_1148_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1148_end_0 = const()[name = tensor("op_1148_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1148_end_mask_0 = const()[name = tensor("op_1148_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1148_cast_fp16")]; + tensor var_1160_begin_0 = const()[name = tensor("op_1160_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1160_end_0 = const()[name = tensor("op_1160_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1160_end_mask_0 = const()[name = tensor("op_1160_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1160_cast_fp16")]; + tensor var_1172_begin_0 = const()[name = tensor("op_1172_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1172_end_0 = const()[name = tensor("op_1172_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1172_end_mask_0 = const()[name = tensor("op_1172_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1172_cast_fp16 = slice_by_index(begin = var_1172_begin_0, end = var_1172_end_0, end_mask = var_1172_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1172_cast_fp16")]; + tensor var_1184_begin_0 = const()[name = tensor("op_1184_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1184_end_0 = const()[name = tensor("op_1184_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1184_end_mask_0 = const()[name = tensor("op_1184_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1184_cast_fp16 = slice_by_index(begin = var_1184_begin_0, end = var_1184_end_0, end_mask = var_1184_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1184_cast_fp16")]; + tensor var_1196_begin_0 = const()[name = tensor("op_1196_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1196_end_0 = const()[name = tensor("op_1196_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1196_end_mask_0 = const()[name = tensor("op_1196_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1196_cast_fp16")]; + tensor var_1208_equation_0 = const()[name = tensor("op_1208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1208_cast_fp16 = einsum(equation = var_1208_equation_0, values = (var_1018_cast_fp16, var_920_cast_fp16))[name = tensor("op_1208_cast_fp16")]; + tensor var_1209_to_fp16 = const()[name = tensor("op_1209_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1210_cast_fp16 = mul(x = var_1208_cast_fp16, y = var_1209_to_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1212_equation_0 = const()[name = tensor("op_1212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1212_cast_fp16 = einsum(equation = var_1212_equation_0, values = (var_1018_cast_fp16, var_924_cast_fp16))[name = tensor("op_1212_cast_fp16")]; + tensor var_1213_to_fp16 = const()[name = tensor("op_1213_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1214_cast_fp16 = mul(x = var_1212_cast_fp16, y = var_1213_to_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1216_equation_0 = const()[name = tensor("op_1216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1216_cast_fp16 = einsum(equation = var_1216_equation_0, values = (var_1018_cast_fp16, var_928_cast_fp16))[name = tensor("op_1216_cast_fp16")]; + tensor var_1217_to_fp16 = const()[name = tensor("op_1217_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1218_cast_fp16 = mul(x = var_1216_cast_fp16, y = var_1217_to_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1220_equation_0 = const()[name = tensor("op_1220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1220_cast_fp16 = einsum(equation = var_1220_equation_0, values = (var_1030_cast_fp16, var_932_cast_fp16))[name = tensor("op_1220_cast_fp16")]; + tensor var_1221_to_fp16 = const()[name = tensor("op_1221_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1222_cast_fp16 = mul(x = var_1220_cast_fp16, y = var_1221_to_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1224_equation_0 = const()[name = tensor("op_1224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1224_cast_fp16 = einsum(equation = var_1224_equation_0, values = (var_1030_cast_fp16, var_936_cast_fp16))[name = tensor("op_1224_cast_fp16")]; + tensor var_1225_to_fp16 = const()[name = tensor("op_1225_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1226_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1030_cast_fp16, var_940_cast_fp16))[name = tensor("op_1228_cast_fp16")]; + tensor var_1229_to_fp16 = const()[name = tensor("op_1229_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1230_cast_fp16 = mul(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1042_cast_fp16, var_944_cast_fp16))[name = tensor("op_1232_cast_fp16")]; + tensor var_1233_to_fp16 = const()[name = tensor("op_1233_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1234_cast_fp16 = mul(x = var_1232_cast_fp16, y = var_1233_to_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1042_cast_fp16, var_948_cast_fp16))[name = tensor("op_1236_cast_fp16")]; + tensor var_1237_to_fp16 = const()[name = tensor("op_1237_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1238_cast_fp16 = mul(x = var_1236_cast_fp16, y = var_1237_to_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1042_cast_fp16, var_952_cast_fp16))[name = tensor("op_1240_cast_fp16")]; + tensor var_1241_to_fp16 = const()[name = tensor("op_1241_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1242_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1054_cast_fp16, var_956_cast_fp16))[name = tensor("op_1244_cast_fp16")]; + tensor var_1245_to_fp16 = const()[name = tensor("op_1245_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1246_cast_fp16 = mul(x = var_1244_cast_fp16, y = var_1245_to_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1054_cast_fp16, var_960_cast_fp16))[name = tensor("op_1248_cast_fp16")]; + tensor var_1249_to_fp16 = const()[name = tensor("op_1249_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1250_cast_fp16 = mul(x = var_1248_cast_fp16, y = var_1249_to_fp16)[name = tensor("op_1250_cast_fp16")]; + tensor var_1252_equation_0 = const()[name = tensor("op_1252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1054_cast_fp16, var_964_cast_fp16))[name = tensor("op_1252_cast_fp16")]; + tensor var_1253_to_fp16 = const()[name = tensor("op_1253_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1254_cast_fp16 = mul(x = var_1252_cast_fp16, y = var_1253_to_fp16)[name = tensor("op_1254_cast_fp16")]; + tensor var_1256_equation_0 = const()[name = tensor("op_1256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1066_cast_fp16, var_968_cast_fp16))[name = tensor("op_1256_cast_fp16")]; + tensor var_1257_to_fp16 = const()[name = tensor("op_1257_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1258_cast_fp16 = mul(x = var_1256_cast_fp16, y = var_1257_to_fp16)[name = tensor("op_1258_cast_fp16")]; + tensor var_1260_equation_0 = const()[name = tensor("op_1260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1066_cast_fp16, var_972_cast_fp16))[name = tensor("op_1260_cast_fp16")]; + tensor var_1261_to_fp16 = const()[name = tensor("op_1261_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1262_cast_fp16 = mul(x = var_1260_cast_fp16, y = var_1261_to_fp16)[name = tensor("op_1262_cast_fp16")]; + tensor var_1264_equation_0 = const()[name = tensor("op_1264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1066_cast_fp16, var_976_cast_fp16))[name = tensor("op_1264_cast_fp16")]; + tensor var_1265_to_fp16 = const()[name = tensor("op_1265_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1266_cast_fp16 = mul(x = var_1264_cast_fp16, y = var_1265_to_fp16)[name = tensor("op_1266_cast_fp16")]; + tensor var_1268_equation_0 = const()[name = tensor("op_1268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1078_cast_fp16, var_980_cast_fp16))[name = tensor("op_1268_cast_fp16")]; + tensor var_1269_to_fp16 = const()[name = tensor("op_1269_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1270_cast_fp16 = mul(x = var_1268_cast_fp16, y = var_1269_to_fp16)[name = tensor("op_1270_cast_fp16")]; + tensor var_1272_equation_0 = const()[name = tensor("op_1272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1078_cast_fp16, var_984_cast_fp16))[name = tensor("op_1272_cast_fp16")]; + tensor var_1273_to_fp16 = const()[name = tensor("op_1273_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1274_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = tensor("op_1274_cast_fp16")]; + tensor var_1276_equation_0 = const()[name = tensor("op_1276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1078_cast_fp16, var_988_cast_fp16))[name = tensor("op_1276_cast_fp16")]; + tensor var_1277_to_fp16 = const()[name = tensor("op_1277_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1278_cast_fp16 = mul(x = var_1276_cast_fp16, y = var_1277_to_fp16)[name = tensor("op_1278_cast_fp16")]; + tensor var_1280_equation_0 = const()[name = tensor("op_1280_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1090_cast_fp16, var_992_cast_fp16))[name = tensor("op_1280_cast_fp16")]; + tensor var_1281_to_fp16 = const()[name = tensor("op_1281_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1282_cast_fp16 = mul(x = var_1280_cast_fp16, y = var_1281_to_fp16)[name = tensor("op_1282_cast_fp16")]; + tensor var_1284_equation_0 = const()[name = tensor("op_1284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1284_cast_fp16 = einsum(equation = var_1284_equation_0, values = (var_1090_cast_fp16, var_996_cast_fp16))[name = tensor("op_1284_cast_fp16")]; + tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1286_cast_fp16 = mul(x = var_1284_cast_fp16, y = var_1285_to_fp16)[name = tensor("op_1286_cast_fp16")]; + tensor var_1288_equation_0 = const()[name = tensor("op_1288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1288_cast_fp16 = einsum(equation = var_1288_equation_0, values = (var_1090_cast_fp16, var_1000_cast_fp16))[name = tensor("op_1288_cast_fp16")]; + tensor var_1289_to_fp16 = const()[name = tensor("op_1289_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1290_cast_fp16 = mul(x = var_1288_cast_fp16, y = var_1289_to_fp16)[name = tensor("op_1290_cast_fp16")]; + tensor var_1292_equation_0 = const()[name = tensor("op_1292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1292_cast_fp16 = einsum(equation = var_1292_equation_0, values = (var_1102_cast_fp16, var_1004_cast_fp16))[name = tensor("op_1292_cast_fp16")]; + tensor var_1293_to_fp16 = const()[name = tensor("op_1293_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1294_cast_fp16 = mul(x = var_1292_cast_fp16, y = var_1293_to_fp16)[name = tensor("op_1294_cast_fp16")]; + tensor var_1296_equation_0 = const()[name = tensor("op_1296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1296_cast_fp16 = einsum(equation = var_1296_equation_0, values = (var_1102_cast_fp16, var_1008_cast_fp16))[name = tensor("op_1296_cast_fp16")]; + tensor var_1297_to_fp16 = const()[name = tensor("op_1297_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1298_cast_fp16 = mul(x = var_1296_cast_fp16, y = var_1297_to_fp16)[name = tensor("op_1298_cast_fp16")]; + tensor var_1300_equation_0 = const()[name = tensor("op_1300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1300_cast_fp16 = einsum(equation = var_1300_equation_0, values = (var_1102_cast_fp16, var_1012_cast_fp16))[name = tensor("op_1300_cast_fp16")]; + tensor var_1301_to_fp16 = const()[name = tensor("op_1301_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1302_cast_fp16 = mul(x = var_1300_cast_fp16, y = var_1301_to_fp16)[name = tensor("op_1302_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_1210_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_1214_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_1218_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_1222_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_1226_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_1230_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_1234_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_1238_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1242_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1246_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1250_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1254_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1258_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1262_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1266_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1270_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1274_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1278_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1282_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1286_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1290_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1294_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1298_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_1302_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_1327_cast_fp16 = softmax(axis = var_779, x = aw_49_cast_fp16)[name = tensor("op_1327_cast_fp16")]; + tensor var_1328_cast_fp16 = softmax(axis = var_779, x = aw_51_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1329_cast_fp16 = softmax(axis = var_779, x = aw_53_cast_fp16)[name = tensor("op_1329_cast_fp16")]; + tensor var_1330_cast_fp16 = softmax(axis = var_779, x = aw_55_cast_fp16)[name = tensor("op_1330_cast_fp16")]; + tensor var_1331_cast_fp16 = softmax(axis = var_779, x = aw_57_cast_fp16)[name = tensor("op_1331_cast_fp16")]; + tensor var_1332_cast_fp16 = softmax(axis = var_779, x = aw_59_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1333_cast_fp16 = softmax(axis = var_779, x = aw_61_cast_fp16)[name = tensor("op_1333_cast_fp16")]; + tensor var_1334_cast_fp16 = softmax(axis = var_779, x = aw_63_cast_fp16)[name = tensor("op_1334_cast_fp16")]; + tensor var_1335_cast_fp16 = softmax(axis = var_779, x = aw_65_cast_fp16)[name = tensor("op_1335_cast_fp16")]; + tensor var_1336_cast_fp16 = softmax(axis = var_779, x = aw_67_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1337_cast_fp16 = softmax(axis = var_779, x = aw_69_cast_fp16)[name = tensor("op_1337_cast_fp16")]; + tensor var_1338_cast_fp16 = softmax(axis = var_779, x = aw_71_cast_fp16)[name = tensor("op_1338_cast_fp16")]; + tensor var_1339_cast_fp16 = softmax(axis = var_779, x = aw_73_cast_fp16)[name = tensor("op_1339_cast_fp16")]; + tensor var_1340_cast_fp16 = softmax(axis = var_779, x = aw_75_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1341_cast_fp16 = softmax(axis = var_779, x = aw_77_cast_fp16)[name = tensor("op_1341_cast_fp16")]; + tensor var_1342_cast_fp16 = softmax(axis = var_779, x = aw_79_cast_fp16)[name = tensor("op_1342_cast_fp16")]; + tensor var_1343_cast_fp16 = softmax(axis = var_779, x = aw_81_cast_fp16)[name = tensor("op_1343_cast_fp16")]; + tensor var_1344_cast_fp16 = softmax(axis = var_779, x = aw_83_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1345_cast_fp16 = softmax(axis = var_779, x = aw_85_cast_fp16)[name = tensor("op_1345_cast_fp16")]; + tensor var_1346_cast_fp16 = softmax(axis = var_779, x = aw_87_cast_fp16)[name = tensor("op_1346_cast_fp16")]; + tensor var_1347_cast_fp16 = softmax(axis = var_779, x = aw_89_cast_fp16)[name = tensor("op_1347_cast_fp16")]; + tensor var_1348_cast_fp16 = softmax(axis = var_779, x = aw_91_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1349_cast_fp16 = softmax(axis = var_779, x = aw_93_cast_fp16)[name = tensor("op_1349_cast_fp16")]; + tensor var_1350_cast_fp16 = softmax(axis = var_779, x = aw_cast_fp16)[name = tensor("op_1350_cast_fp16")]; + tensor var_1352_equation_0 = const()[name = tensor("op_1352_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1352_cast_fp16 = einsum(equation = var_1352_equation_0, values = (var_1112_cast_fp16, var_1327_cast_fp16))[name = tensor("op_1352_cast_fp16")]; + tensor var_1354_equation_0 = const()[name = tensor("op_1354_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1354_cast_fp16 = einsum(equation = var_1354_equation_0, values = (var_1112_cast_fp16, var_1328_cast_fp16))[name = tensor("op_1354_cast_fp16")]; + tensor var_1356_equation_0 = const()[name = tensor("op_1356_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1356_cast_fp16 = einsum(equation = var_1356_equation_0, values = (var_1112_cast_fp16, var_1329_cast_fp16))[name = tensor("op_1356_cast_fp16")]; + tensor var_1358_equation_0 = const()[name = tensor("op_1358_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1358_cast_fp16 = einsum(equation = var_1358_equation_0, values = (var_1124_cast_fp16, var_1330_cast_fp16))[name = tensor("op_1358_cast_fp16")]; + tensor var_1360_equation_0 = const()[name = tensor("op_1360_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1360_cast_fp16 = einsum(equation = var_1360_equation_0, values = (var_1124_cast_fp16, var_1331_cast_fp16))[name = tensor("op_1360_cast_fp16")]; + tensor var_1362_equation_0 = const()[name = tensor("op_1362_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1362_cast_fp16 = einsum(equation = var_1362_equation_0, values = (var_1124_cast_fp16, var_1332_cast_fp16))[name = tensor("op_1362_cast_fp16")]; + tensor var_1364_equation_0 = const()[name = tensor("op_1364_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1364_cast_fp16 = einsum(equation = var_1364_equation_0, values = (var_1136_cast_fp16, var_1333_cast_fp16))[name = tensor("op_1364_cast_fp16")]; + tensor var_1366_equation_0 = const()[name = tensor("op_1366_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1366_cast_fp16 = einsum(equation = var_1366_equation_0, values = (var_1136_cast_fp16, var_1334_cast_fp16))[name = tensor("op_1366_cast_fp16")]; + tensor var_1368_equation_0 = const()[name = tensor("op_1368_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1368_cast_fp16 = einsum(equation = var_1368_equation_0, values = (var_1136_cast_fp16, var_1335_cast_fp16))[name = tensor("op_1368_cast_fp16")]; + tensor var_1370_equation_0 = const()[name = tensor("op_1370_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1370_cast_fp16 = einsum(equation = var_1370_equation_0, values = (var_1148_cast_fp16, var_1336_cast_fp16))[name = tensor("op_1370_cast_fp16")]; + tensor var_1372_equation_0 = const()[name = tensor("op_1372_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1372_cast_fp16 = einsum(equation = var_1372_equation_0, values = (var_1148_cast_fp16, var_1337_cast_fp16))[name = tensor("op_1372_cast_fp16")]; + tensor var_1374_equation_0 = const()[name = tensor("op_1374_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1374_cast_fp16 = einsum(equation = var_1374_equation_0, values = (var_1148_cast_fp16, var_1338_cast_fp16))[name = tensor("op_1374_cast_fp16")]; + tensor var_1376_equation_0 = const()[name = tensor("op_1376_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1376_cast_fp16 = einsum(equation = var_1376_equation_0, values = (var_1160_cast_fp16, var_1339_cast_fp16))[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_equation_0 = const()[name = tensor("op_1378_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1378_cast_fp16 = einsum(equation = var_1378_equation_0, values = (var_1160_cast_fp16, var_1340_cast_fp16))[name = tensor("op_1378_cast_fp16")]; + tensor var_1380_equation_0 = const()[name = tensor("op_1380_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1380_cast_fp16 = einsum(equation = var_1380_equation_0, values = (var_1160_cast_fp16, var_1341_cast_fp16))[name = tensor("op_1380_cast_fp16")]; + tensor var_1382_equation_0 = const()[name = tensor("op_1382_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1382_cast_fp16 = einsum(equation = var_1382_equation_0, values = (var_1172_cast_fp16, var_1342_cast_fp16))[name = tensor("op_1382_cast_fp16")]; + tensor var_1384_equation_0 = const()[name = tensor("op_1384_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1384_cast_fp16 = einsum(equation = var_1384_equation_0, values = (var_1172_cast_fp16, var_1343_cast_fp16))[name = tensor("op_1384_cast_fp16")]; + tensor var_1386_equation_0 = const()[name = tensor("op_1386_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1386_cast_fp16 = einsum(equation = var_1386_equation_0, values = (var_1172_cast_fp16, var_1344_cast_fp16))[name = tensor("op_1386_cast_fp16")]; + tensor var_1388_equation_0 = const()[name = tensor("op_1388_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1388_cast_fp16 = einsum(equation = var_1388_equation_0, values = (var_1184_cast_fp16, var_1345_cast_fp16))[name = tensor("op_1388_cast_fp16")]; + tensor var_1390_equation_0 = const()[name = tensor("op_1390_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1390_cast_fp16 = einsum(equation = var_1390_equation_0, values = (var_1184_cast_fp16, var_1346_cast_fp16))[name = tensor("op_1390_cast_fp16")]; + tensor var_1392_equation_0 = const()[name = tensor("op_1392_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1392_cast_fp16 = einsum(equation = var_1392_equation_0, values = (var_1184_cast_fp16, var_1347_cast_fp16))[name = tensor("op_1392_cast_fp16")]; + tensor var_1394_equation_0 = const()[name = tensor("op_1394_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1394_cast_fp16 = einsum(equation = var_1394_equation_0, values = (var_1196_cast_fp16, var_1348_cast_fp16))[name = tensor("op_1394_cast_fp16")]; + tensor var_1396_equation_0 = const()[name = tensor("op_1396_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1396_cast_fp16 = einsum(equation = var_1396_equation_0, values = (var_1196_cast_fp16, var_1349_cast_fp16))[name = tensor("op_1396_cast_fp16")]; + tensor var_1398_equation_0 = const()[name = tensor("op_1398_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1398_cast_fp16 = einsum(equation = var_1398_equation_0, values = (var_1196_cast_fp16, var_1350_cast_fp16))[name = tensor("op_1398_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_779, interleave = x_27_interleave_0, values = (var_1352_cast_fp16, var_1354_cast_fp16, var_1356_cast_fp16, var_1358_cast_fp16, var_1360_cast_fp16, var_1362_cast_fp16, var_1364_cast_fp16, var_1366_cast_fp16, var_1368_cast_fp16, var_1370_cast_fp16, var_1372_cast_fp16, var_1374_cast_fp16, var_1376_cast_fp16, var_1378_cast_fp16, var_1380_cast_fp16, var_1382_cast_fp16, var_1384_cast_fp16, var_1386_cast_fp16, var_1388_cast_fp16, var_1390_cast_fp16, var_1392_cast_fp16, var_1394_cast_fp16, var_1396_cast_fp16, var_1398_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1403 = const()[name = tensor("op_1403"), val = tensor([1, 3072, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1403, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1406 = const()[name = tensor("op_1406"), val = tensor([1, 1])]; + tensor var_1408 = const()[name = tensor("op_1408"), val = tensor([1, 1])]; + tensor attention_output_pad_type_0 = const()[name = tensor("attention_output_pad_type_0"), val = tensor("custom")]; + tensor attention_output_pad_0 = const()[name = tensor("attention_output_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232803776)))]; + tensor attention_output_cast_fp16 = conv(dilations = var_1408, groups = var_779, pad = attention_output_pad_0, pad_type = attention_output_pad_type_0, strides = var_1406, weight = blocks_1_attn_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678208)))]; + tensor x_eps_cast_fp16 = concat(axis = var_779, interleave = x_eps_interleave_0, values = (x_29_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_782, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1434_to_fp16 = const()[name = tensor("op_1434_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1434_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678400)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1445 = const()[name = tensor("op_1445"), val = tensor([1, 1])]; + tensor var_1447 = const()[name = tensor("op_1447"), val = tensor([1, 1])]; + tensor input_17_pad_type_0 = const()[name = tensor("input_17_pad_type_0"), val = tensor("custom")]; + tensor input_17_pad_0 = const()[name = tensor("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251684608)))]; + tensor input_17_cast_fp16 = conv(dilations = var_1447, groups = var_779, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_1445, weight = blocks_1_mlp_fc_1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1451 = const()[name = tensor("op_1451"), val = tensor([1, 1])]; + tensor var_1453 = const()[name = tensor("op_1453"), val = tensor([1, 1])]; + tensor x_fc_2_pad_type_0 = const()[name = tensor("x_fc_2_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_pad_0 = const()[name = tensor("x_fc_2_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302016320)))]; + tensor x_fc_2_cast_fp16 = conv(dilations = var_1453, groups = var_779, pad = x_fc_2_pad_0, pad_type = x_fc_2_pad_type_0, strides = var_1451, weight = blocks_1_mlp_fc_2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_1456_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1456_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_1456_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_1459 = const()[name = tensor("op_1459"), val = tensor([1, 1])]; + tensor var_1461 = const()[name = tensor("op_1461"), val = tensor([1, 1])]; + tensor var_1463_pad_type_0 = const()[name = tensor("op_1463_pad_type_0"), val = tensor("custom")]; + tensor var_1463_pad_0 = const()[name = tensor("op_1463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(352348032)))]; + tensor var_1463_cast_fp16 = conv(dilations = var_1461, groups = var_779, pad = var_1463_pad_0, pad_type = var_1463_pad_type_0, strides = var_1459, weight = blocks_1_mlp_proj_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_1463_cast_fp16")]; + tensor new_x = add(x = var_1463_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_1464_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_v_cache_0, new_k_cache_1, new_v_cache_1); +} \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk14.mlmodelc/weights/weight.bin b/Llama-3.2-3B-Instruct_chunk14.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..5f7055194b4048a816ee80c4e4a8e28c1386f4ea --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk14.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f154c6a8553b1baf60e07b7f98801702aa94b24c825db6651c5dea4d28f0c0b6 +size 402679744 diff --git a/Llama-3.2-3B-Instruct_chunk15.mlmodelc/analytics/coremldata.bin b/Llama-3.2-3B-Instruct_chunk15.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a63af39cde8e590e41fffd270ab8aede737490d --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk15.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf21e446e7587de3fd840eae95f3e79729298df568725552f7ef5fd8f954e58c +size 243 diff --git a/Llama-3.2-3B-Instruct_chunk15.mlmodelc/coremldata.bin b/Llama-3.2-3B-Instruct_chunk15.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef844658693d8a7fc2951abf2761f8f5f9bc62c3 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk15.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8129d684aa1ea8b76708a186fe44f7ffc4aa08b4854907105fe41c0825e71875 +size 653 diff --git a/Llama-3.2-3B-Instruct_chunk15.mlmodelc/metadata.json b/Llama-3.2-3B-Instruct_chunk15.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..c90ee9167173ecd1abc42ad897a8e971d3f9c882 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk15.mlmodelc/metadata.json @@ -0,0 +1,178 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 14, + "Ios16.mul" : 70, + "SliceByIndex" : 88, + "Transpose" : 2, + "Ios16.einsum" : 96, + "Ios16.conv" : 14, + "Ios16.add" : 56, + "Ios16.realDiv" : 4, + "Ios16.softmax" : 48, + "Ios16.reduceL2Norm" : 4, + "Ios16.reshape" : 14, + "Ios16.silu" : 2 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 512, 1, 64]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_3_2_3B_Instruct_2024_11_09_16_14_37_chunk15", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk15.mlmodelc/model.mil b/Llama-3.2-3B-Instruct_chunk15.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..78594b4291dc45ae43652f9a31200581b19ad3c6 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk15.mlmodelc/model.mil @@ -0,0 +1,956 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { + tensor var_13 = const()[name = tensor("op_13"), val = tensor(-1)]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-2)]; + tensor var_19 = const()[name = tensor("op_19"), val = tensor(-3)]; + tensor var_52 = const()[name = tensor("op_52"), val = tensor(1)]; + tensor var_55 = const()[name = tensor("op_55"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_52, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_55, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_79_to_fp16 = const()[name = tensor("op_79_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_79_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_100 = const()[name = tensor("op_100"), val = tensor([1, 3072, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_100, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_103 = const()[name = tensor("op_103"), val = tensor([1, 1])]; + tensor var_105 = const()[name = tensor("op_105"), val = tensor([1, 1])]; + tensor q_1_pad_type_0 = const()[name = tensor("q_1_pad_type_0"), val = tensor("custom")]; + tensor q_1_pad_0 = const()[name = tensor("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6464)))]; + tensor q_1_cast_fp16 = conv(dilations = var_105, groups = var_52, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = var_103, weight = blocks_0_attn_q_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_109 = const()[name = tensor("op_109"), val = tensor([1, 1])]; + tensor var_111 = const()[name = tensor("op_111"), val = tensor([1, 1])]; + tensor k_1_pad_type_0 = const()[name = tensor("k_1_pad_type_0"), val = tensor("custom")]; + tensor k_1_pad_0 = const()[name = tensor("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18880896)))]; + tensor k_1_cast_fp16 = conv(dilations = var_111, groups = var_52, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = var_109, weight = blocks_0_attn_k_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_115 = const()[name = tensor("op_115"), val = tensor([1, 1])]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor v_1_pad_type_0 = const()[name = tensor("v_1_pad_type_0"), val = tensor("custom")]; + tensor v_1_pad_0 = const()[name = tensor("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25172416)))]; + tensor v_1_cast_fp16 = conv(dilations = var_117, groups = var_52, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = var_115, weight = blocks_0_attn_v_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_120 = const()[name = tensor("op_120"), val = tensor([1, 24, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_120, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_122 = const()[name = tensor("op_122"), val = tensor([1, -1, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_122, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_136_begin_0 = const()[name = tensor("op_136_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_136_end_0 = const()[name = tensor("op_136_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_136_end_mask_0 = const()[name = tensor("op_136_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_136_cast_fp16 = slice_by_index(begin = var_136_begin_0, end = var_136_end_0, end_mask = var_136_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_136_cast_fp16")]; + tensor var_142_begin_0 = const()[name = tensor("op_142_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_142_end_0 = const()[name = tensor("op_142_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_142_end_mask_0 = const()[name = tensor("op_142_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_142_cast_fp16 = slice_by_index(begin = var_142_begin_0, end = var_142_end_0, end_mask = var_142_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_142_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_144_cast_fp16 = mul(x = var_142_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_144_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_17, interleave = rotated_1_interleave_0, values = (var_144_cast_fp16, var_136_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_147_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_147_cast_fp16")]; + tensor var_148_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_148_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_147_cast_fp16, y = var_148_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor var_167_begin_0 = const()[name = tensor("op_167_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_167_end_0 = const()[name = tensor("op_167_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_167_end_mask_0 = const()[name = tensor("op_167_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_167_cast_fp16 = slice_by_index(begin = var_167_begin_0, end = var_167_end_0, end_mask = var_167_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_167_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_169_cast_fp16 = mul(x = var_167_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_169_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_17, interleave = rotated_3_interleave_0, values = (var_169_cast_fp16, var_161_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_172_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_172_cast_fp16")]; + tensor var_173_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_173_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_172_cast_fp16, y = var_173_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_176 = const()[name = tensor("op_176"), val = tensor([1, -1, 1, 64])]; + tensor k_7_cast_fp16 = reshape(shape = var_176, x = roped_3_cast_fp16)[name = tensor("k_7_cast_fp16")]; + tensor var_178 = const()[name = tensor("op_178"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_178, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_11_interleave_0 = const()[name = tensor("k_11_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_9_perm_0, x = k_7_cast_fp16)[name = tensor("transpose_1")]; + tensor k_11_cast_fp16 = concat(axis = var_19, interleave = k_11_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_11_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_13, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1, 3072, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_186, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_191_begin_0 = const()[name = tensor("op_191_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_191_end_0 = const()[name = tensor("op_191_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_191_end_mask_0 = const()[name = tensor("op_191_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_191_cast_fp16 = slice_by_index(begin = var_191_begin_0, end = var_191_end_0, end_mask = var_191_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_191_cast_fp16")]; + tensor var_195_begin_0 = const()[name = tensor("op_195_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_195_end_0 = const()[name = tensor("op_195_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_195_end_mask_0 = const()[name = tensor("op_195_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_195_cast_fp16 = slice_by_index(begin = var_195_begin_0, end = var_195_end_0, end_mask = var_195_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_195_cast_fp16")]; + tensor var_199_begin_0 = const()[name = tensor("op_199_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_199_end_0 = const()[name = tensor("op_199_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_199_end_mask_0 = const()[name = tensor("op_199_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_199_cast_fp16 = slice_by_index(begin = var_199_begin_0, end = var_199_end_0, end_mask = var_199_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_199_cast_fp16")]; + tensor var_203_begin_0 = const()[name = tensor("op_203_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_203_end_0 = const()[name = tensor("op_203_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_203_end_mask_0 = const()[name = tensor("op_203_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_203_cast_fp16 = slice_by_index(begin = var_203_begin_0, end = var_203_end_0, end_mask = var_203_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_203_cast_fp16")]; + tensor var_207_begin_0 = const()[name = tensor("op_207_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_207_end_0 = const()[name = tensor("op_207_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_207_end_mask_0 = const()[name = tensor("op_207_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = var_207_end_0, end_mask = var_207_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_207_cast_fp16")]; + tensor var_211_begin_0 = const()[name = tensor("op_211_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_211_end_0 = const()[name = tensor("op_211_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_211_end_mask_0 = const()[name = tensor("op_211_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_211_cast_fp16 = slice_by_index(begin = var_211_begin_0, end = var_211_end_0, end_mask = var_211_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_211_cast_fp16")]; + tensor var_215_begin_0 = const()[name = tensor("op_215_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_215_end_0 = const()[name = tensor("op_215_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_215_end_mask_0 = const()[name = tensor("op_215_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_215_cast_fp16 = slice_by_index(begin = var_215_begin_0, end = var_215_end_0, end_mask = var_215_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_215_cast_fp16")]; + tensor var_219_begin_0 = const()[name = tensor("op_219_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_219_end_0 = const()[name = tensor("op_219_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_219_end_mask_0 = const()[name = tensor("op_219_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_219_cast_fp16 = slice_by_index(begin = var_219_begin_0, end = var_219_end_0, end_mask = var_219_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_219_cast_fp16")]; + tensor var_223_begin_0 = const()[name = tensor("op_223_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_223_end_0 = const()[name = tensor("op_223_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_223_end_mask_0 = const()[name = tensor("op_223_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_223_cast_fp16 = slice_by_index(begin = var_223_begin_0, end = var_223_end_0, end_mask = var_223_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_223_cast_fp16")]; + tensor var_227_begin_0 = const()[name = tensor("op_227_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_227_end_0 = const()[name = tensor("op_227_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_227_end_mask_0 = const()[name = tensor("op_227_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_227_cast_fp16")]; + tensor var_231_begin_0 = const()[name = tensor("op_231_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_231_end_0 = const()[name = tensor("op_231_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_231_end_mask_0 = const()[name = tensor("op_231_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_231_cast_fp16 = slice_by_index(begin = var_231_begin_0, end = var_231_end_0, end_mask = var_231_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_231_cast_fp16")]; + tensor var_235_begin_0 = const()[name = tensor("op_235_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_235_end_0 = const()[name = tensor("op_235_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_235_end_mask_0 = const()[name = tensor("op_235_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = var_235_end_0, end_mask = var_235_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_235_cast_fp16")]; + tensor var_239_begin_0 = const()[name = tensor("op_239_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_239_end_0 = const()[name = tensor("op_239_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_239_end_mask_0 = const()[name = tensor("op_239_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_239_cast_fp16")]; + tensor var_243_begin_0 = const()[name = tensor("op_243_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_243_end_0 = const()[name = tensor("op_243_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_243_end_mask_0 = const()[name = tensor("op_243_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_243_cast_fp16")]; + tensor var_247_begin_0 = const()[name = tensor("op_247_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_247_end_0 = const()[name = tensor("op_247_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_247_end_mask_0 = const()[name = tensor("op_247_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_247_cast_fp16")]; + tensor var_251_begin_0 = const()[name = tensor("op_251_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_251_end_0 = const()[name = tensor("op_251_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_251_end_mask_0 = const()[name = tensor("op_251_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_251_cast_fp16")]; + tensor var_255_begin_0 = const()[name = tensor("op_255_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_255_end_0 = const()[name = tensor("op_255_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_255_end_mask_0 = const()[name = tensor("op_255_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_255_cast_fp16")]; + tensor var_259_begin_0 = const()[name = tensor("op_259_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_259_end_0 = const()[name = tensor("op_259_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_259_end_mask_0 = const()[name = tensor("op_259_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_263_cast_fp16")]; + tensor var_267_begin_0 = const()[name = tensor("op_267_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_267_end_0 = const()[name = tensor("op_267_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_267_end_mask_0 = const()[name = tensor("op_267_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_267_cast_fp16")]; + tensor var_271_begin_0 = const()[name = tensor("op_271_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_271_end_0 = const()[name = tensor("op_271_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_271_end_mask_0 = const()[name = tensor("op_271_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_271_cast_fp16")]; + tensor var_275_begin_0 = const()[name = tensor("op_275_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_275_end_0 = const()[name = tensor("op_275_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_275_end_mask_0 = const()[name = tensor("op_275_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_275_cast_fp16")]; + tensor var_279_begin_0 = const()[name = tensor("op_279_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_279_end_0 = const()[name = tensor("op_279_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_279_end_mask_0 = const()[name = tensor("op_279_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_279_cast_fp16")]; + tensor var_283_begin_0 = const()[name = tensor("op_283_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_283_end_0 = const()[name = tensor("op_283_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_283_end_mask_0 = const()[name = tensor("op_283_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_283_cast_fp16")]; + tensor var_289_begin_0 = const()[name = tensor("op_289_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_289_end_0 = const()[name = tensor("op_289_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_289_end_mask_0 = const()[name = tensor("op_289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_289_cast_fp16")]; + tensor var_301_begin_0 = const()[name = tensor("op_301_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_301_end_0 = const()[name = tensor("op_301_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_301_end_mask_0 = const()[name = tensor("op_301_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_301_cast_fp16")]; + tensor var_313_begin_0 = const()[name = tensor("op_313_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_313_end_0 = const()[name = tensor("op_313_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_313_end_mask_0 = const()[name = tensor("op_313_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_313_cast_fp16 = slice_by_index(begin = var_313_begin_0, end = var_313_end_0, end_mask = var_313_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_313_cast_fp16")]; + tensor var_325_begin_0 = const()[name = tensor("op_325_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_325_end_0 = const()[name = tensor("op_325_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_325_end_mask_0 = const()[name = tensor("op_325_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_325_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_349_begin_0 = const()[name = tensor("op_349_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_349_end_0 = const()[name = tensor("op_349_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_349_end_mask_0 = const()[name = tensor("op_349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_349_cast_fp16 = slice_by_index(begin = var_349_begin_0, end = var_349_end_0, end_mask = var_349_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_349_cast_fp16")]; + tensor var_361_begin_0 = const()[name = tensor("op_361_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_361_end_0 = const()[name = tensor("op_361_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_361_end_mask_0 = const()[name = tensor("op_361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_361_cast_fp16 = slice_by_index(begin = var_361_begin_0, end = var_361_end_0, end_mask = var_361_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_361_cast_fp16")]; + tensor var_373_begin_0 = const()[name = tensor("op_373_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_373_end_0 = const()[name = tensor("op_373_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_373_end_mask_0 = const()[name = tensor("op_373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_373_cast_fp16")]; + tensor var_383_begin_0 = const()[name = tensor("op_383_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_383_end_0 = const()[name = tensor("op_383_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_383_end_mask_0 = const()[name = tensor("op_383_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_383_cast_fp16")]; + tensor var_395_begin_0 = const()[name = tensor("op_395_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_395_end_0 = const()[name = tensor("op_395_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_395_end_mask_0 = const()[name = tensor("op_395_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_395_cast_fp16")]; + tensor var_407_begin_0 = const()[name = tensor("op_407_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_407_end_0 = const()[name = tensor("op_407_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_407_end_mask_0 = const()[name = tensor("op_407_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_407_cast_fp16 = slice_by_index(begin = var_407_begin_0, end = var_407_end_0, end_mask = var_407_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_407_cast_fp16")]; + tensor var_419_begin_0 = const()[name = tensor("op_419_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_419_end_0 = const()[name = tensor("op_419_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_419_end_mask_0 = const()[name = tensor("op_419_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_419_cast_fp16 = slice_by_index(begin = var_419_begin_0, end = var_419_end_0, end_mask = var_419_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_419_cast_fp16")]; + tensor var_431_begin_0 = const()[name = tensor("op_431_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_431_end_0 = const()[name = tensor("op_431_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_431_end_mask_0 = const()[name = tensor("op_431_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_431_cast_fp16 = slice_by_index(begin = var_431_begin_0, end = var_431_end_0, end_mask = var_431_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_431_cast_fp16")]; + tensor var_443_begin_0 = const()[name = tensor("op_443_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_443_end_0 = const()[name = tensor("op_443_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_443_end_mask_0 = const()[name = tensor("op_443_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_443_cast_fp16 = slice_by_index(begin = var_443_begin_0, end = var_443_end_0, end_mask = var_443_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_443_cast_fp16")]; + tensor var_455_begin_0 = const()[name = tensor("op_455_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_455_end_0 = const()[name = tensor("op_455_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_455_end_mask_0 = const()[name = tensor("op_455_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_455_cast_fp16")]; + tensor var_467_begin_0 = const()[name = tensor("op_467_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_467_end_0 = const()[name = tensor("op_467_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_467_end_mask_0 = const()[name = tensor("op_467_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_467_cast_fp16 = slice_by_index(begin = var_467_begin_0, end = var_467_end_0, end_mask = var_467_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_467_cast_fp16")]; + tensor var_479_equation_0 = const()[name = tensor("op_479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_289_cast_fp16, var_191_cast_fp16))[name = tensor("op_479_cast_fp16")]; + tensor var_480_to_fp16 = const()[name = tensor("op_480_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_481_cast_fp16 = mul(x = var_479_cast_fp16, y = var_480_to_fp16)[name = tensor("op_481_cast_fp16")]; + tensor var_483_equation_0 = const()[name = tensor("op_483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_289_cast_fp16, var_195_cast_fp16))[name = tensor("op_483_cast_fp16")]; + tensor var_484_to_fp16 = const()[name = tensor("op_484_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_485_cast_fp16 = mul(x = var_483_cast_fp16, y = var_484_to_fp16)[name = tensor("op_485_cast_fp16")]; + tensor var_487_equation_0 = const()[name = tensor("op_487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_487_cast_fp16 = einsum(equation = var_487_equation_0, values = (var_289_cast_fp16, var_199_cast_fp16))[name = tensor("op_487_cast_fp16")]; + tensor var_488_to_fp16 = const()[name = tensor("op_488_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_489_cast_fp16 = mul(x = var_487_cast_fp16, y = var_488_to_fp16)[name = tensor("op_489_cast_fp16")]; + tensor var_491_equation_0 = const()[name = tensor("op_491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_491_cast_fp16 = einsum(equation = var_491_equation_0, values = (var_301_cast_fp16, var_203_cast_fp16))[name = tensor("op_491_cast_fp16")]; + tensor var_492_to_fp16 = const()[name = tensor("op_492_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_493_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = tensor("op_493_cast_fp16")]; + tensor var_495_equation_0 = const()[name = tensor("op_495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_495_cast_fp16 = einsum(equation = var_495_equation_0, values = (var_301_cast_fp16, var_207_cast_fp16))[name = tensor("op_495_cast_fp16")]; + tensor var_496_to_fp16 = const()[name = tensor("op_496_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_497_cast_fp16 = mul(x = var_495_cast_fp16, y = var_496_to_fp16)[name = tensor("op_497_cast_fp16")]; + tensor var_499_equation_0 = const()[name = tensor("op_499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_499_cast_fp16 = einsum(equation = var_499_equation_0, values = (var_301_cast_fp16, var_211_cast_fp16))[name = tensor("op_499_cast_fp16")]; + tensor var_500_to_fp16 = const()[name = tensor("op_500_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_501_cast_fp16 = mul(x = var_499_cast_fp16, y = var_500_to_fp16)[name = tensor("op_501_cast_fp16")]; + tensor var_503_equation_0 = const()[name = tensor("op_503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_503_cast_fp16 = einsum(equation = var_503_equation_0, values = (var_313_cast_fp16, var_215_cast_fp16))[name = tensor("op_503_cast_fp16")]; + tensor var_504_to_fp16 = const()[name = tensor("op_504_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_505_cast_fp16 = mul(x = var_503_cast_fp16, y = var_504_to_fp16)[name = tensor("op_505_cast_fp16")]; + tensor var_507_equation_0 = const()[name = tensor("op_507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_507_cast_fp16 = einsum(equation = var_507_equation_0, values = (var_313_cast_fp16, var_219_cast_fp16))[name = tensor("op_507_cast_fp16")]; + tensor var_508_to_fp16 = const()[name = tensor("op_508_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16, y = var_508_to_fp16)[name = tensor("op_509_cast_fp16")]; + tensor var_511_equation_0 = const()[name = tensor("op_511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_511_cast_fp16 = einsum(equation = var_511_equation_0, values = (var_313_cast_fp16, var_223_cast_fp16))[name = tensor("op_511_cast_fp16")]; + tensor var_512_to_fp16 = const()[name = tensor("op_512_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_513_cast_fp16 = mul(x = var_511_cast_fp16, y = var_512_to_fp16)[name = tensor("op_513_cast_fp16")]; + tensor var_515_equation_0 = const()[name = tensor("op_515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_515_cast_fp16 = einsum(equation = var_515_equation_0, values = (var_325_cast_fp16, var_227_cast_fp16))[name = tensor("op_515_cast_fp16")]; + tensor var_516_to_fp16 = const()[name = tensor("op_516_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_517_cast_fp16 = mul(x = var_515_cast_fp16, y = var_516_to_fp16)[name = tensor("op_517_cast_fp16")]; + tensor var_519_equation_0 = const()[name = tensor("op_519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_519_cast_fp16 = einsum(equation = var_519_equation_0, values = (var_325_cast_fp16, var_231_cast_fp16))[name = tensor("op_519_cast_fp16")]; + tensor var_520_to_fp16 = const()[name = tensor("op_520_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_521_cast_fp16 = mul(x = var_519_cast_fp16, y = var_520_to_fp16)[name = tensor("op_521_cast_fp16")]; + tensor var_523_equation_0 = const()[name = tensor("op_523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_325_cast_fp16, var_235_cast_fp16))[name = tensor("op_523_cast_fp16")]; + tensor var_524_to_fp16 = const()[name = tensor("op_524_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_525_cast_fp16 = mul(x = var_523_cast_fp16, y = var_524_to_fp16)[name = tensor("op_525_cast_fp16")]; + tensor var_527_equation_0 = const()[name = tensor("op_527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_337_cast_fp16, var_239_cast_fp16))[name = tensor("op_527_cast_fp16")]; + tensor var_528_to_fp16 = const()[name = tensor("op_528_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_529_cast_fp16 = mul(x = var_527_cast_fp16, y = var_528_to_fp16)[name = tensor("op_529_cast_fp16")]; + tensor var_531_equation_0 = const()[name = tensor("op_531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_337_cast_fp16, var_243_cast_fp16))[name = tensor("op_531_cast_fp16")]; + tensor var_532_to_fp16 = const()[name = tensor("op_532_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_533_cast_fp16 = mul(x = var_531_cast_fp16, y = var_532_to_fp16)[name = tensor("op_533_cast_fp16")]; + tensor var_535_equation_0 = const()[name = tensor("op_535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_337_cast_fp16, var_247_cast_fp16))[name = tensor("op_535_cast_fp16")]; + tensor var_536_to_fp16 = const()[name = tensor("op_536_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_537_cast_fp16 = mul(x = var_535_cast_fp16, y = var_536_to_fp16)[name = tensor("op_537_cast_fp16")]; + tensor var_539_equation_0 = const()[name = tensor("op_539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_539_cast_fp16 = einsum(equation = var_539_equation_0, values = (var_349_cast_fp16, var_251_cast_fp16))[name = tensor("op_539_cast_fp16")]; + tensor var_540_to_fp16 = const()[name = tensor("op_540_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_541_cast_fp16 = mul(x = var_539_cast_fp16, y = var_540_to_fp16)[name = tensor("op_541_cast_fp16")]; + tensor var_543_equation_0 = const()[name = tensor("op_543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_543_cast_fp16 = einsum(equation = var_543_equation_0, values = (var_349_cast_fp16, var_255_cast_fp16))[name = tensor("op_543_cast_fp16")]; + tensor var_544_to_fp16 = const()[name = tensor("op_544_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_545_cast_fp16 = mul(x = var_543_cast_fp16, y = var_544_to_fp16)[name = tensor("op_545_cast_fp16")]; + tensor var_547_equation_0 = const()[name = tensor("op_547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_349_cast_fp16, var_259_cast_fp16))[name = tensor("op_547_cast_fp16")]; + tensor var_548_to_fp16 = const()[name = tensor("op_548_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_549_cast_fp16 = mul(x = var_547_cast_fp16, y = var_548_to_fp16)[name = tensor("op_549_cast_fp16")]; + tensor var_551_equation_0 = const()[name = tensor("op_551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_361_cast_fp16, var_263_cast_fp16))[name = tensor("op_551_cast_fp16")]; + tensor var_552_to_fp16 = const()[name = tensor("op_552_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_553_cast_fp16 = mul(x = var_551_cast_fp16, y = var_552_to_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_555_equation_0 = const()[name = tensor("op_555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_361_cast_fp16, var_267_cast_fp16))[name = tensor("op_555_cast_fp16")]; + tensor var_556_to_fp16 = const()[name = tensor("op_556_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_557_cast_fp16 = mul(x = var_555_cast_fp16, y = var_556_to_fp16)[name = tensor("op_557_cast_fp16")]; + tensor var_559_equation_0 = const()[name = tensor("op_559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_361_cast_fp16, var_271_cast_fp16))[name = tensor("op_559_cast_fp16")]; + tensor var_560_to_fp16 = const()[name = tensor("op_560_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = var_560_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor var_563_equation_0 = const()[name = tensor("op_563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_373_cast_fp16, var_275_cast_fp16))[name = tensor("op_563_cast_fp16")]; + tensor var_564_to_fp16 = const()[name = tensor("op_564_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_565_cast_fp16 = mul(x = var_563_cast_fp16, y = var_564_to_fp16)[name = tensor("op_565_cast_fp16")]; + tensor var_567_equation_0 = const()[name = tensor("op_567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_373_cast_fp16, var_279_cast_fp16))[name = tensor("op_567_cast_fp16")]; + tensor var_568_to_fp16 = const()[name = tensor("op_568_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_569_cast_fp16 = mul(x = var_567_cast_fp16, y = var_568_to_fp16)[name = tensor("op_569_cast_fp16")]; + tensor var_571_equation_0 = const()[name = tensor("op_571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_373_cast_fp16, var_283_cast_fp16))[name = tensor("op_571_cast_fp16")]; + tensor var_572_to_fp16 = const()[name = tensor("op_572_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_573_cast_fp16 = mul(x = var_571_cast_fp16, y = var_572_to_fp16)[name = tensor("op_573_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_481_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_485_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_489_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_493_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_497_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_501_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_505_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_509_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_513_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_517_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_521_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_525_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_529_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_533_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_537_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_541_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_545_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_549_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_553_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_557_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_561_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_565_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_569_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_573_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor var_598_cast_fp16 = softmax(axis = var_52, x = aw_1_cast_fp16)[name = tensor("op_598_cast_fp16")]; + tensor var_599_cast_fp16 = softmax(axis = var_52, x = aw_3_cast_fp16)[name = tensor("op_599_cast_fp16")]; + tensor var_600_cast_fp16 = softmax(axis = var_52, x = aw_5_cast_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_601_cast_fp16 = softmax(axis = var_52, x = aw_7_cast_fp16)[name = tensor("op_601_cast_fp16")]; + tensor var_602_cast_fp16 = softmax(axis = var_52, x = aw_9_cast_fp16)[name = tensor("op_602_cast_fp16")]; + tensor var_603_cast_fp16 = softmax(axis = var_52, x = aw_11_cast_fp16)[name = tensor("op_603_cast_fp16")]; + tensor var_604_cast_fp16 = softmax(axis = var_52, x = aw_13_cast_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_605_cast_fp16 = softmax(axis = var_52, x = aw_15_cast_fp16)[name = tensor("op_605_cast_fp16")]; + tensor var_606_cast_fp16 = softmax(axis = var_52, x = aw_17_cast_fp16)[name = tensor("op_606_cast_fp16")]; + tensor var_607_cast_fp16 = softmax(axis = var_52, x = aw_19_cast_fp16)[name = tensor("op_607_cast_fp16")]; + tensor var_608_cast_fp16 = softmax(axis = var_52, x = aw_21_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_609_cast_fp16 = softmax(axis = var_52, x = aw_23_cast_fp16)[name = tensor("op_609_cast_fp16")]; + tensor var_610_cast_fp16 = softmax(axis = var_52, x = aw_25_cast_fp16)[name = tensor("op_610_cast_fp16")]; + tensor var_611_cast_fp16 = softmax(axis = var_52, x = aw_27_cast_fp16)[name = tensor("op_611_cast_fp16")]; + tensor var_612_cast_fp16 = softmax(axis = var_52, x = aw_29_cast_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_613_cast_fp16 = softmax(axis = var_52, x = aw_31_cast_fp16)[name = tensor("op_613_cast_fp16")]; + tensor var_614_cast_fp16 = softmax(axis = var_52, x = aw_33_cast_fp16)[name = tensor("op_614_cast_fp16")]; + tensor var_615_cast_fp16 = softmax(axis = var_52, x = aw_35_cast_fp16)[name = tensor("op_615_cast_fp16")]; + tensor var_616_cast_fp16 = softmax(axis = var_52, x = aw_37_cast_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_617_cast_fp16 = softmax(axis = var_52, x = aw_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618_cast_fp16 = softmax(axis = var_52, x = aw_41_cast_fp16)[name = tensor("op_618_cast_fp16")]; + tensor var_619_cast_fp16 = softmax(axis = var_52, x = aw_43_cast_fp16)[name = tensor("op_619_cast_fp16")]; + tensor var_620_cast_fp16 = softmax(axis = var_52, x = aw_45_cast_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_621_cast_fp16 = softmax(axis = var_52, x = aw_47_cast_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_623_equation_0 = const()[name = tensor("op_623_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_383_cast_fp16, var_598_cast_fp16))[name = tensor("op_623_cast_fp16")]; + tensor var_625_equation_0 = const()[name = tensor("op_625_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_383_cast_fp16, var_599_cast_fp16))[name = tensor("op_625_cast_fp16")]; + tensor var_627_equation_0 = const()[name = tensor("op_627_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_383_cast_fp16, var_600_cast_fp16))[name = tensor("op_627_cast_fp16")]; + tensor var_629_equation_0 = const()[name = tensor("op_629_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_395_cast_fp16, var_601_cast_fp16))[name = tensor("op_629_cast_fp16")]; + tensor var_631_equation_0 = const()[name = tensor("op_631_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_395_cast_fp16, var_602_cast_fp16))[name = tensor("op_631_cast_fp16")]; + tensor var_633_equation_0 = const()[name = tensor("op_633_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_395_cast_fp16, var_603_cast_fp16))[name = tensor("op_633_cast_fp16")]; + tensor var_635_equation_0 = const()[name = tensor("op_635_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_407_cast_fp16, var_604_cast_fp16))[name = tensor("op_635_cast_fp16")]; + tensor var_637_equation_0 = const()[name = tensor("op_637_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_407_cast_fp16, var_605_cast_fp16))[name = tensor("op_637_cast_fp16")]; + tensor var_639_equation_0 = const()[name = tensor("op_639_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_407_cast_fp16, var_606_cast_fp16))[name = tensor("op_639_cast_fp16")]; + tensor var_641_equation_0 = const()[name = tensor("op_641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_419_cast_fp16, var_607_cast_fp16))[name = tensor("op_641_cast_fp16")]; + tensor var_643_equation_0 = const()[name = tensor("op_643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_419_cast_fp16, var_608_cast_fp16))[name = tensor("op_643_cast_fp16")]; + tensor var_645_equation_0 = const()[name = tensor("op_645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_419_cast_fp16, var_609_cast_fp16))[name = tensor("op_645_cast_fp16")]; + tensor var_647_equation_0 = const()[name = tensor("op_647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_647_cast_fp16 = einsum(equation = var_647_equation_0, values = (var_431_cast_fp16, var_610_cast_fp16))[name = tensor("op_647_cast_fp16")]; + tensor var_649_equation_0 = const()[name = tensor("op_649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_649_cast_fp16 = einsum(equation = var_649_equation_0, values = (var_431_cast_fp16, var_611_cast_fp16))[name = tensor("op_649_cast_fp16")]; + tensor var_651_equation_0 = const()[name = tensor("op_651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_651_cast_fp16 = einsum(equation = var_651_equation_0, values = (var_431_cast_fp16, var_612_cast_fp16))[name = tensor("op_651_cast_fp16")]; + tensor var_653_equation_0 = const()[name = tensor("op_653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_653_cast_fp16 = einsum(equation = var_653_equation_0, values = (var_443_cast_fp16, var_613_cast_fp16))[name = tensor("op_653_cast_fp16")]; + tensor var_655_equation_0 = const()[name = tensor("op_655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_655_cast_fp16 = einsum(equation = var_655_equation_0, values = (var_443_cast_fp16, var_614_cast_fp16))[name = tensor("op_655_cast_fp16")]; + tensor var_657_equation_0 = const()[name = tensor("op_657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_657_cast_fp16 = einsum(equation = var_657_equation_0, values = (var_443_cast_fp16, var_615_cast_fp16))[name = tensor("op_657_cast_fp16")]; + tensor var_659_equation_0 = const()[name = tensor("op_659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_659_cast_fp16 = einsum(equation = var_659_equation_0, values = (var_455_cast_fp16, var_616_cast_fp16))[name = tensor("op_659_cast_fp16")]; + tensor var_661_equation_0 = const()[name = tensor("op_661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_661_cast_fp16 = einsum(equation = var_661_equation_0, values = (var_455_cast_fp16, var_617_cast_fp16))[name = tensor("op_661_cast_fp16")]; + tensor var_663_equation_0 = const()[name = tensor("op_663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_663_cast_fp16 = einsum(equation = var_663_equation_0, values = (var_455_cast_fp16, var_618_cast_fp16))[name = tensor("op_663_cast_fp16")]; + tensor var_665_equation_0 = const()[name = tensor("op_665_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_665_cast_fp16 = einsum(equation = var_665_equation_0, values = (var_467_cast_fp16, var_619_cast_fp16))[name = tensor("op_665_cast_fp16")]; + tensor var_667_equation_0 = const()[name = tensor("op_667_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_667_cast_fp16 = einsum(equation = var_667_equation_0, values = (var_467_cast_fp16, var_620_cast_fp16))[name = tensor("op_667_cast_fp16")]; + tensor var_669_equation_0 = const()[name = tensor("op_669_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_669_cast_fp16 = einsum(equation = var_669_equation_0, values = (var_467_cast_fp16, var_621_cast_fp16))[name = tensor("op_669_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_52, interleave = x_11_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16, var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16, var_647_cast_fp16, var_649_cast_fp16, var_651_cast_fp16, var_653_cast_fp16, var_655_cast_fp16, var_657_cast_fp16, var_659_cast_fp16, var_661_cast_fp16, var_663_cast_fp16, var_665_cast_fp16, var_667_cast_fp16, var_669_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_674 = const()[name = tensor("op_674"), val = tensor([1, 3072, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_674, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_677 = const()[name = tensor("op_677"), val = tensor([1, 1])]; + tensor var_679 = const()[name = tensor("op_679"), val = tensor([1, 1])]; + tensor attention_output_1_pad_type_0 = const()[name = tensor("attention_output_1_pad_type_0"), val = tensor("custom")]; + tensor attention_output_1_pad_0 = const()[name = tensor("attention_output_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31463936)))]; + tensor attention_output_1_cast_fp16 = conv(dilations = var_679, groups = var_52, pad = attention_output_1_pad_0, pad_type = attention_output_1_pad_type_0, strides = var_677, weight = blocks_0_attn_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338368)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_52, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_55, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_705_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338560)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_716 = const()[name = tensor("op_716"), val = tensor([1, 1])]; + tensor var_718 = const()[name = tensor("op_718"), val = tensor([1, 1])]; + tensor input_7_pad_type_0 = const()[name = tensor("input_7_pad_type_0"), val = tensor("custom")]; + tensor input_7_pad_0 = const()[name = tensor("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50344768)))]; + tensor input_7_cast_fp16 = conv(dilations = var_718, groups = var_52, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_716, weight = blocks_0_mlp_fc_1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_722 = const()[name = tensor("op_722"), val = tensor([1, 1])]; + tensor var_724 = const()[name = tensor("op_724"), val = tensor([1, 1])]; + tensor x_fc_2_1_pad_type_0 = const()[name = tensor("x_fc_2_1_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_1_pad_0 = const()[name = tensor("x_fc_2_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100676480)))]; + tensor x_fc_2_1_cast_fp16 = conv(dilations = var_724, groups = var_52, pad = x_fc_2_1_pad_0, pad_type = x_fc_2_1_pad_type_0, strides = var_722, weight = blocks_0_mlp_fc_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_727_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_727_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_727_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_730 = const()[name = tensor("op_730"), val = tensor([1, 1])]; + tensor var_732 = const()[name = tensor("op_732"), val = tensor([1, 1])]; + tensor var_734_pad_type_0 = const()[name = tensor("op_734_pad_type_0"), val = tensor("custom")]; + tensor var_734_pad_0 = const()[name = tensor("op_734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151008192)))]; + tensor var_734_cast_fp16 = conv(dilations = var_732, groups = var_52, pad = var_734_pad_0, pad_type = var_734_pad_type_0, strides = var_730, weight = blocks_0_mlp_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("op_734_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_734_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_740 = const()[name = tensor("op_740"), val = tensor(-1)]; + tensor var_744 = const()[name = tensor("op_744"), val = tensor(-2)]; + tensor var_746 = const()[name = tensor("op_746"), val = tensor(-3)]; + tensor var_779 = const()[name = tensor("op_779"), val = tensor(1)]; + tensor var_782 = const()[name = tensor("op_782"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201339904)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_779, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_782, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_805_to_fp16 = const()[name = tensor("op_805_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_805_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201340096)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_829 = const()[name = tensor("op_829"), val = tensor([1, 3072, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_829, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_832 = const()[name = tensor("op_832"), val = tensor([1, 1])]; + tensor var_834 = const()[name = tensor("op_834"), val = tensor([1, 1])]; + tensor q_9_pad_type_0 = const()[name = tensor("q_9_pad_type_0"), val = tensor("custom")]; + tensor q_9_pad_0 = const()[name = tensor("q_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201346304)))]; + tensor q_9_cast_fp16 = conv(dilations = var_834, groups = var_779, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = var_832, weight = blocks_1_attn_q_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_838 = const()[name = tensor("op_838"), val = tensor([1, 1])]; + tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 1])]; + tensor k_13_pad_type_0 = const()[name = tensor("k_13_pad_type_0"), val = tensor("custom")]; + tensor k_13_pad_0 = const()[name = tensor("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220220736)))]; + tensor k_13_cast_fp16 = conv(dilations = var_840, groups = var_779, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = var_838, weight = blocks_1_attn_k_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_844 = const()[name = tensor("op_844"), val = tensor([1, 1])]; + tensor var_846 = const()[name = tensor("op_846"), val = tensor([1, 1])]; + tensor v_11_pad_type_0 = const()[name = tensor("v_11_pad_type_0"), val = tensor("custom")]; + tensor v_11_pad_0 = const()[name = tensor("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226512256)))]; + tensor v_11_cast_fp16 = conv(dilations = var_846, groups = var_779, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = var_844, weight = blocks_1_attn_v_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_849 = const()[name = tensor("op_849"), val = tensor([1, 24, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_849, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_851 = const()[name = tensor("op_851"), val = tensor([1, -1, 128, 64])]; + tensor k_15_cast_fp16 = reshape(shape = var_851, x = k_13_cast_fp16)[name = tensor("k_15_cast_fp16")]; + tensor var_865_begin_0 = const()[name = tensor("op_865_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_865_end_0 = const()[name = tensor("op_865_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_865_end_mask_0 = const()[name = tensor("op_865_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_865_cast_fp16")]; + tensor var_871_begin_0 = const()[name = tensor("op_871_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_871_end_0 = const()[name = tensor("op_871_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_871_end_mask_0 = const()[name = tensor("op_871_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_871_cast_fp16 = slice_by_index(begin = var_871_begin_0, end = var_871_end_0, end_mask = var_871_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_871_cast_fp16")]; + tensor const_30_promoted_to_fp16 = const()[name = tensor("const_30_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_873_cast_fp16 = mul(x = var_871_cast_fp16, y = const_30_promoted_to_fp16)[name = tensor("op_873_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_744, interleave = rotated_5_interleave_0, values = (var_873_cast_fp16, var_865_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_876_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_876_cast_fp16")]; + tensor var_877_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_877_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_876_cast_fp16, y = var_877_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_890_begin_0 = const()[name = tensor("op_890_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_890_end_0 = const()[name = tensor("op_890_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_890_end_mask_0 = const()[name = tensor("op_890_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_890_cast_fp16 = slice_by_index(begin = var_890_begin_0, end = var_890_end_0, end_mask = var_890_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_890_cast_fp16")]; + tensor var_896_begin_0 = const()[name = tensor("op_896_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_896_end_0 = const()[name = tensor("op_896_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_896_end_mask_0 = const()[name = tensor("op_896_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_896_cast_fp16 = slice_by_index(begin = var_896_begin_0, end = var_896_end_0, end_mask = var_896_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_896_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_898_cast_fp16 = mul(x = var_896_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_898_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_744, interleave = rotated_interleave_0, values = (var_898_cast_fp16, var_890_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_901_cast_fp16 = mul(x = k_15_cast_fp16, y = cos)[name = tensor("op_901_cast_fp16")]; + tensor var_902_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_902_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_901_cast_fp16, y = var_902_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_905 = const()[name = tensor("op_905"), val = tensor([1, -1, 1, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_905, x = roped_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_907 = const()[name = tensor("op_907"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_907, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_21_perm_0, x = k_19_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_746, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_740, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_915 = const()[name = tensor("op_915"), val = tensor([1, 3072, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_915, x = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_920_begin_0 = const()[name = tensor("op_920_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_end_0 = const()[name = tensor("op_920_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_920_end_mask_0 = const()[name = tensor("op_920_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = q_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor var_924_begin_0 = const()[name = tensor("op_924_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_924_end_0 = const()[name = tensor("op_924_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_924_end_mask_0 = const()[name = tensor("op_924_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_924_cast_fp16 = slice_by_index(begin = var_924_begin_0, end = var_924_end_0, end_mask = var_924_end_mask_0, x = q_cast_fp16)[name = tensor("op_924_cast_fp16")]; + tensor var_928_begin_0 = const()[name = tensor("op_928_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_928_end_0 = const()[name = tensor("op_928_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_928_end_mask_0 = const()[name = tensor("op_928_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = q_cast_fp16)[name = tensor("op_928_cast_fp16")]; + tensor var_932_begin_0 = const()[name = tensor("op_932_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_932_end_0 = const()[name = tensor("op_932_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_932_end_mask_0 = const()[name = tensor("op_932_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_932_cast_fp16 = slice_by_index(begin = var_932_begin_0, end = var_932_end_0, end_mask = var_932_end_mask_0, x = q_cast_fp16)[name = tensor("op_932_cast_fp16")]; + tensor var_936_begin_0 = const()[name = tensor("op_936_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_936_end_0 = const()[name = tensor("op_936_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_936_end_mask_0 = const()[name = tensor("op_936_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_936_cast_fp16 = slice_by_index(begin = var_936_begin_0, end = var_936_end_0, end_mask = var_936_end_mask_0, x = q_cast_fp16)[name = tensor("op_936_cast_fp16")]; + tensor var_940_begin_0 = const()[name = tensor("op_940_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_940_end_0 = const()[name = tensor("op_940_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_940_end_mask_0 = const()[name = tensor("op_940_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_940_cast_fp16 = slice_by_index(begin = var_940_begin_0, end = var_940_end_0, end_mask = var_940_end_mask_0, x = q_cast_fp16)[name = tensor("op_940_cast_fp16")]; + tensor var_944_begin_0 = const()[name = tensor("op_944_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_944_end_0 = const()[name = tensor("op_944_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_944_end_mask_0 = const()[name = tensor("op_944_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_944_cast_fp16 = slice_by_index(begin = var_944_begin_0, end = var_944_end_0, end_mask = var_944_end_mask_0, x = q_cast_fp16)[name = tensor("op_944_cast_fp16")]; + tensor var_948_begin_0 = const()[name = tensor("op_948_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_948_end_0 = const()[name = tensor("op_948_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_948_end_mask_0 = const()[name = tensor("op_948_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_948_cast_fp16 = slice_by_index(begin = var_948_begin_0, end = var_948_end_0, end_mask = var_948_end_mask_0, x = q_cast_fp16)[name = tensor("op_948_cast_fp16")]; + tensor var_952_begin_0 = const()[name = tensor("op_952_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_952_end_0 = const()[name = tensor("op_952_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_952_end_mask_0 = const()[name = tensor("op_952_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_952_cast_fp16 = slice_by_index(begin = var_952_begin_0, end = var_952_end_0, end_mask = var_952_end_mask_0, x = q_cast_fp16)[name = tensor("op_952_cast_fp16")]; + tensor var_956_begin_0 = const()[name = tensor("op_956_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_956_end_0 = const()[name = tensor("op_956_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_956_end_mask_0 = const()[name = tensor("op_956_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_956_cast_fp16 = slice_by_index(begin = var_956_begin_0, end = var_956_end_0, end_mask = var_956_end_mask_0, x = q_cast_fp16)[name = tensor("op_956_cast_fp16")]; + tensor var_960_begin_0 = const()[name = tensor("op_960_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_960_end_0 = const()[name = tensor("op_960_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_960_end_mask_0 = const()[name = tensor("op_960_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_960_cast_fp16 = slice_by_index(begin = var_960_begin_0, end = var_960_end_0, end_mask = var_960_end_mask_0, x = q_cast_fp16)[name = tensor("op_960_cast_fp16")]; + tensor var_964_begin_0 = const()[name = tensor("op_964_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_964_end_0 = const()[name = tensor("op_964_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_964_end_mask_0 = const()[name = tensor("op_964_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_964_cast_fp16 = slice_by_index(begin = var_964_begin_0, end = var_964_end_0, end_mask = var_964_end_mask_0, x = q_cast_fp16)[name = tensor("op_964_cast_fp16")]; + tensor var_968_begin_0 = const()[name = tensor("op_968_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_968_end_0 = const()[name = tensor("op_968_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_968_end_mask_0 = const()[name = tensor("op_968_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = q_cast_fp16)[name = tensor("op_968_cast_fp16")]; + tensor var_972_begin_0 = const()[name = tensor("op_972_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_972_end_0 = const()[name = tensor("op_972_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_972_end_mask_0 = const()[name = tensor("op_972_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = q_cast_fp16)[name = tensor("op_972_cast_fp16")]; + tensor var_976_begin_0 = const()[name = tensor("op_976_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_976_end_0 = const()[name = tensor("op_976_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_976_end_mask_0 = const()[name = tensor("op_976_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = var_976_end_0, end_mask = var_976_end_mask_0, x = q_cast_fp16)[name = tensor("op_976_cast_fp16")]; + tensor var_980_begin_0 = const()[name = tensor("op_980_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_980_end_0 = const()[name = tensor("op_980_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_980_end_mask_0 = const()[name = tensor("op_980_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_980_cast_fp16 = slice_by_index(begin = var_980_begin_0, end = var_980_end_0, end_mask = var_980_end_mask_0, x = q_cast_fp16)[name = tensor("op_980_cast_fp16")]; + tensor var_984_begin_0 = const()[name = tensor("op_984_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_984_end_0 = const()[name = tensor("op_984_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_984_end_mask_0 = const()[name = tensor("op_984_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_984_cast_fp16 = slice_by_index(begin = var_984_begin_0, end = var_984_end_0, end_mask = var_984_end_mask_0, x = q_cast_fp16)[name = tensor("op_984_cast_fp16")]; + tensor var_988_begin_0 = const()[name = tensor("op_988_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_988_end_0 = const()[name = tensor("op_988_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_988_end_mask_0 = const()[name = tensor("op_988_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_988_cast_fp16 = slice_by_index(begin = var_988_begin_0, end = var_988_end_0, end_mask = var_988_end_mask_0, x = q_cast_fp16)[name = tensor("op_988_cast_fp16")]; + tensor var_992_begin_0 = const()[name = tensor("op_992_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_992_end_0 = const()[name = tensor("op_992_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_992_end_mask_0 = const()[name = tensor("op_992_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_992_cast_fp16 = slice_by_index(begin = var_992_begin_0, end = var_992_end_0, end_mask = var_992_end_mask_0, x = q_cast_fp16)[name = tensor("op_992_cast_fp16")]; + tensor var_996_begin_0 = const()[name = tensor("op_996_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_996_end_0 = const()[name = tensor("op_996_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_996_end_mask_0 = const()[name = tensor("op_996_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_996_cast_fp16 = slice_by_index(begin = var_996_begin_0, end = var_996_end_0, end_mask = var_996_end_mask_0, x = q_cast_fp16)[name = tensor("op_996_cast_fp16")]; + tensor var_1000_begin_0 = const()[name = tensor("op_1000_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1000_end_0 = const()[name = tensor("op_1000_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1000_end_mask_0 = const()[name = tensor("op_1000_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = var_1000_end_0, end_mask = var_1000_end_mask_0, x = q_cast_fp16)[name = tensor("op_1000_cast_fp16")]; + tensor var_1004_begin_0 = const()[name = tensor("op_1004_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1004_end_0 = const()[name = tensor("op_1004_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1004_end_mask_0 = const()[name = tensor("op_1004_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1004_cast_fp16 = slice_by_index(begin = var_1004_begin_0, end = var_1004_end_0, end_mask = var_1004_end_mask_0, x = q_cast_fp16)[name = tensor("op_1004_cast_fp16")]; + tensor var_1008_begin_0 = const()[name = tensor("op_1008_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1008_end_0 = const()[name = tensor("op_1008_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1008_end_mask_0 = const()[name = tensor("op_1008_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = var_1008_end_0, end_mask = var_1008_end_mask_0, x = q_cast_fp16)[name = tensor("op_1008_cast_fp16")]; + tensor var_1012_begin_0 = const()[name = tensor("op_1012_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1012_end_0 = const()[name = tensor("op_1012_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1012_end_mask_0 = const()[name = tensor("op_1012_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1012_cast_fp16 = slice_by_index(begin = var_1012_begin_0, end = var_1012_end_0, end_mask = var_1012_end_mask_0, x = q_cast_fp16)[name = tensor("op_1012_cast_fp16")]; + tensor var_1018_begin_0 = const()[name = tensor("op_1018_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1018_end_0 = const()[name = tensor("op_1018_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1018_end_mask_0 = const()[name = tensor("op_1018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1018_cast_fp16 = slice_by_index(begin = var_1018_begin_0, end = var_1018_end_0, end_mask = var_1018_end_mask_0, x = k_cast_fp16)[name = tensor("op_1018_cast_fp16")]; + tensor var_1030_begin_0 = const()[name = tensor("op_1030_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1030_end_0 = const()[name = tensor("op_1030_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1030_end_mask_0 = const()[name = tensor("op_1030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, x = k_cast_fp16)[name = tensor("op_1030_cast_fp16")]; + tensor var_1042_begin_0 = const()[name = tensor("op_1042_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1042_end_0 = const()[name = tensor("op_1042_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1042_end_mask_0 = const()[name = tensor("op_1042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = k_cast_fp16)[name = tensor("op_1042_cast_fp16")]; + tensor var_1054_begin_0 = const()[name = tensor("op_1054_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1054_end_0 = const()[name = tensor("op_1054_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1054_end_mask_0 = const()[name = tensor("op_1054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1054_cast_fp16 = slice_by_index(begin = var_1054_begin_0, end = var_1054_end_0, end_mask = var_1054_end_mask_0, x = k_cast_fp16)[name = tensor("op_1054_cast_fp16")]; + tensor var_1066_begin_0 = const()[name = tensor("op_1066_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1066_end_0 = const()[name = tensor("op_1066_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1066_end_mask_0 = const()[name = tensor("op_1066_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1066_cast_fp16 = slice_by_index(begin = var_1066_begin_0, end = var_1066_end_0, end_mask = var_1066_end_mask_0, x = k_cast_fp16)[name = tensor("op_1066_cast_fp16")]; + tensor var_1078_begin_0 = const()[name = tensor("op_1078_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1078_end_0 = const()[name = tensor("op_1078_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1078_end_mask_0 = const()[name = tensor("op_1078_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1078_cast_fp16 = slice_by_index(begin = var_1078_begin_0, end = var_1078_end_0, end_mask = var_1078_end_mask_0, x = k_cast_fp16)[name = tensor("op_1078_cast_fp16")]; + tensor var_1090_begin_0 = const()[name = tensor("op_1090_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1090_end_0 = const()[name = tensor("op_1090_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1090_end_mask_0 = const()[name = tensor("op_1090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = k_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor var_1102_begin_0 = const()[name = tensor("op_1102_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1102_end_0 = const()[name = tensor("op_1102_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1102_end_mask_0 = const()[name = tensor("op_1102_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1102_cast_fp16 = slice_by_index(begin = var_1102_begin_0, end = var_1102_end_0, end_mask = var_1102_end_mask_0, x = k_cast_fp16)[name = tensor("op_1102_cast_fp16")]; + tensor var_1112_begin_0 = const()[name = tensor("op_1112_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1112_end_0 = const()[name = tensor("op_1112_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1112_end_mask_0 = const()[name = tensor("op_1112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1112_cast_fp16")]; + tensor var_1124_begin_0 = const()[name = tensor("op_1124_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1124_end_0 = const()[name = tensor("op_1124_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1124_end_mask_0 = const()[name = tensor("op_1124_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1124_cast_fp16")]; + tensor var_1136_begin_0 = const()[name = tensor("op_1136_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1136_end_0 = const()[name = tensor("op_1136_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1136_end_mask_0 = const()[name = tensor("op_1136_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1136_cast_fp16")]; + tensor var_1148_begin_0 = const()[name = tensor("op_1148_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1148_end_0 = const()[name = tensor("op_1148_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1148_end_mask_0 = const()[name = tensor("op_1148_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1148_cast_fp16")]; + tensor var_1160_begin_0 = const()[name = tensor("op_1160_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1160_end_0 = const()[name = tensor("op_1160_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1160_end_mask_0 = const()[name = tensor("op_1160_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1160_cast_fp16")]; + tensor var_1172_begin_0 = const()[name = tensor("op_1172_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1172_end_0 = const()[name = tensor("op_1172_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1172_end_mask_0 = const()[name = tensor("op_1172_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1172_cast_fp16 = slice_by_index(begin = var_1172_begin_0, end = var_1172_end_0, end_mask = var_1172_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1172_cast_fp16")]; + tensor var_1184_begin_0 = const()[name = tensor("op_1184_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1184_end_0 = const()[name = tensor("op_1184_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1184_end_mask_0 = const()[name = tensor("op_1184_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1184_cast_fp16 = slice_by_index(begin = var_1184_begin_0, end = var_1184_end_0, end_mask = var_1184_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1184_cast_fp16")]; + tensor var_1196_begin_0 = const()[name = tensor("op_1196_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1196_end_0 = const()[name = tensor("op_1196_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1196_end_mask_0 = const()[name = tensor("op_1196_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1196_cast_fp16")]; + tensor var_1208_equation_0 = const()[name = tensor("op_1208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1208_cast_fp16 = einsum(equation = var_1208_equation_0, values = (var_1018_cast_fp16, var_920_cast_fp16))[name = tensor("op_1208_cast_fp16")]; + tensor var_1209_to_fp16 = const()[name = tensor("op_1209_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1210_cast_fp16 = mul(x = var_1208_cast_fp16, y = var_1209_to_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1212_equation_0 = const()[name = tensor("op_1212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1212_cast_fp16 = einsum(equation = var_1212_equation_0, values = (var_1018_cast_fp16, var_924_cast_fp16))[name = tensor("op_1212_cast_fp16")]; + tensor var_1213_to_fp16 = const()[name = tensor("op_1213_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1214_cast_fp16 = mul(x = var_1212_cast_fp16, y = var_1213_to_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1216_equation_0 = const()[name = tensor("op_1216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1216_cast_fp16 = einsum(equation = var_1216_equation_0, values = (var_1018_cast_fp16, var_928_cast_fp16))[name = tensor("op_1216_cast_fp16")]; + tensor var_1217_to_fp16 = const()[name = tensor("op_1217_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1218_cast_fp16 = mul(x = var_1216_cast_fp16, y = var_1217_to_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1220_equation_0 = const()[name = tensor("op_1220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1220_cast_fp16 = einsum(equation = var_1220_equation_0, values = (var_1030_cast_fp16, var_932_cast_fp16))[name = tensor("op_1220_cast_fp16")]; + tensor var_1221_to_fp16 = const()[name = tensor("op_1221_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1222_cast_fp16 = mul(x = var_1220_cast_fp16, y = var_1221_to_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1224_equation_0 = const()[name = tensor("op_1224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1224_cast_fp16 = einsum(equation = var_1224_equation_0, values = (var_1030_cast_fp16, var_936_cast_fp16))[name = tensor("op_1224_cast_fp16")]; + tensor var_1225_to_fp16 = const()[name = tensor("op_1225_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1226_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1030_cast_fp16, var_940_cast_fp16))[name = tensor("op_1228_cast_fp16")]; + tensor var_1229_to_fp16 = const()[name = tensor("op_1229_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1230_cast_fp16 = mul(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1042_cast_fp16, var_944_cast_fp16))[name = tensor("op_1232_cast_fp16")]; + tensor var_1233_to_fp16 = const()[name = tensor("op_1233_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1234_cast_fp16 = mul(x = var_1232_cast_fp16, y = var_1233_to_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1042_cast_fp16, var_948_cast_fp16))[name = tensor("op_1236_cast_fp16")]; + tensor var_1237_to_fp16 = const()[name = tensor("op_1237_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1238_cast_fp16 = mul(x = var_1236_cast_fp16, y = var_1237_to_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1042_cast_fp16, var_952_cast_fp16))[name = tensor("op_1240_cast_fp16")]; + tensor var_1241_to_fp16 = const()[name = tensor("op_1241_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1242_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1054_cast_fp16, var_956_cast_fp16))[name = tensor("op_1244_cast_fp16")]; + tensor var_1245_to_fp16 = const()[name = tensor("op_1245_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1246_cast_fp16 = mul(x = var_1244_cast_fp16, y = var_1245_to_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1054_cast_fp16, var_960_cast_fp16))[name = tensor("op_1248_cast_fp16")]; + tensor var_1249_to_fp16 = const()[name = tensor("op_1249_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1250_cast_fp16 = mul(x = var_1248_cast_fp16, y = var_1249_to_fp16)[name = tensor("op_1250_cast_fp16")]; + tensor var_1252_equation_0 = const()[name = tensor("op_1252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1054_cast_fp16, var_964_cast_fp16))[name = tensor("op_1252_cast_fp16")]; + tensor var_1253_to_fp16 = const()[name = tensor("op_1253_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1254_cast_fp16 = mul(x = var_1252_cast_fp16, y = var_1253_to_fp16)[name = tensor("op_1254_cast_fp16")]; + tensor var_1256_equation_0 = const()[name = tensor("op_1256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1066_cast_fp16, var_968_cast_fp16))[name = tensor("op_1256_cast_fp16")]; + tensor var_1257_to_fp16 = const()[name = tensor("op_1257_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1258_cast_fp16 = mul(x = var_1256_cast_fp16, y = var_1257_to_fp16)[name = tensor("op_1258_cast_fp16")]; + tensor var_1260_equation_0 = const()[name = tensor("op_1260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1066_cast_fp16, var_972_cast_fp16))[name = tensor("op_1260_cast_fp16")]; + tensor var_1261_to_fp16 = const()[name = tensor("op_1261_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1262_cast_fp16 = mul(x = var_1260_cast_fp16, y = var_1261_to_fp16)[name = tensor("op_1262_cast_fp16")]; + tensor var_1264_equation_0 = const()[name = tensor("op_1264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1066_cast_fp16, var_976_cast_fp16))[name = tensor("op_1264_cast_fp16")]; + tensor var_1265_to_fp16 = const()[name = tensor("op_1265_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1266_cast_fp16 = mul(x = var_1264_cast_fp16, y = var_1265_to_fp16)[name = tensor("op_1266_cast_fp16")]; + tensor var_1268_equation_0 = const()[name = tensor("op_1268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1078_cast_fp16, var_980_cast_fp16))[name = tensor("op_1268_cast_fp16")]; + tensor var_1269_to_fp16 = const()[name = tensor("op_1269_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1270_cast_fp16 = mul(x = var_1268_cast_fp16, y = var_1269_to_fp16)[name = tensor("op_1270_cast_fp16")]; + tensor var_1272_equation_0 = const()[name = tensor("op_1272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1078_cast_fp16, var_984_cast_fp16))[name = tensor("op_1272_cast_fp16")]; + tensor var_1273_to_fp16 = const()[name = tensor("op_1273_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1274_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = tensor("op_1274_cast_fp16")]; + tensor var_1276_equation_0 = const()[name = tensor("op_1276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1078_cast_fp16, var_988_cast_fp16))[name = tensor("op_1276_cast_fp16")]; + tensor var_1277_to_fp16 = const()[name = tensor("op_1277_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1278_cast_fp16 = mul(x = var_1276_cast_fp16, y = var_1277_to_fp16)[name = tensor("op_1278_cast_fp16")]; + tensor var_1280_equation_0 = const()[name = tensor("op_1280_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1090_cast_fp16, var_992_cast_fp16))[name = tensor("op_1280_cast_fp16")]; + tensor var_1281_to_fp16 = const()[name = tensor("op_1281_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1282_cast_fp16 = mul(x = var_1280_cast_fp16, y = var_1281_to_fp16)[name = tensor("op_1282_cast_fp16")]; + tensor var_1284_equation_0 = const()[name = tensor("op_1284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1284_cast_fp16 = einsum(equation = var_1284_equation_0, values = (var_1090_cast_fp16, var_996_cast_fp16))[name = tensor("op_1284_cast_fp16")]; + tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1286_cast_fp16 = mul(x = var_1284_cast_fp16, y = var_1285_to_fp16)[name = tensor("op_1286_cast_fp16")]; + tensor var_1288_equation_0 = const()[name = tensor("op_1288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1288_cast_fp16 = einsum(equation = var_1288_equation_0, values = (var_1090_cast_fp16, var_1000_cast_fp16))[name = tensor("op_1288_cast_fp16")]; + tensor var_1289_to_fp16 = const()[name = tensor("op_1289_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1290_cast_fp16 = mul(x = var_1288_cast_fp16, y = var_1289_to_fp16)[name = tensor("op_1290_cast_fp16")]; + tensor var_1292_equation_0 = const()[name = tensor("op_1292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1292_cast_fp16 = einsum(equation = var_1292_equation_0, values = (var_1102_cast_fp16, var_1004_cast_fp16))[name = tensor("op_1292_cast_fp16")]; + tensor var_1293_to_fp16 = const()[name = tensor("op_1293_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1294_cast_fp16 = mul(x = var_1292_cast_fp16, y = var_1293_to_fp16)[name = tensor("op_1294_cast_fp16")]; + tensor var_1296_equation_0 = const()[name = tensor("op_1296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1296_cast_fp16 = einsum(equation = var_1296_equation_0, values = (var_1102_cast_fp16, var_1008_cast_fp16))[name = tensor("op_1296_cast_fp16")]; + tensor var_1297_to_fp16 = const()[name = tensor("op_1297_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1298_cast_fp16 = mul(x = var_1296_cast_fp16, y = var_1297_to_fp16)[name = tensor("op_1298_cast_fp16")]; + tensor var_1300_equation_0 = const()[name = tensor("op_1300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1300_cast_fp16 = einsum(equation = var_1300_equation_0, values = (var_1102_cast_fp16, var_1012_cast_fp16))[name = tensor("op_1300_cast_fp16")]; + tensor var_1301_to_fp16 = const()[name = tensor("op_1301_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1302_cast_fp16 = mul(x = var_1300_cast_fp16, y = var_1301_to_fp16)[name = tensor("op_1302_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_1210_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_1214_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_1218_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_1222_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_1226_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_1230_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_1234_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_1238_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1242_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1246_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1250_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1254_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1258_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1262_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1266_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1270_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1274_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1278_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1282_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1286_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1290_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1294_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1298_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_1302_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_1327_cast_fp16 = softmax(axis = var_779, x = aw_49_cast_fp16)[name = tensor("op_1327_cast_fp16")]; + tensor var_1328_cast_fp16 = softmax(axis = var_779, x = aw_51_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1329_cast_fp16 = softmax(axis = var_779, x = aw_53_cast_fp16)[name = tensor("op_1329_cast_fp16")]; + tensor var_1330_cast_fp16 = softmax(axis = var_779, x = aw_55_cast_fp16)[name = tensor("op_1330_cast_fp16")]; + tensor var_1331_cast_fp16 = softmax(axis = var_779, x = aw_57_cast_fp16)[name = tensor("op_1331_cast_fp16")]; + tensor var_1332_cast_fp16 = softmax(axis = var_779, x = aw_59_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1333_cast_fp16 = softmax(axis = var_779, x = aw_61_cast_fp16)[name = tensor("op_1333_cast_fp16")]; + tensor var_1334_cast_fp16 = softmax(axis = var_779, x = aw_63_cast_fp16)[name = tensor("op_1334_cast_fp16")]; + tensor var_1335_cast_fp16 = softmax(axis = var_779, x = aw_65_cast_fp16)[name = tensor("op_1335_cast_fp16")]; + tensor var_1336_cast_fp16 = softmax(axis = var_779, x = aw_67_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1337_cast_fp16 = softmax(axis = var_779, x = aw_69_cast_fp16)[name = tensor("op_1337_cast_fp16")]; + tensor var_1338_cast_fp16 = softmax(axis = var_779, x = aw_71_cast_fp16)[name = tensor("op_1338_cast_fp16")]; + tensor var_1339_cast_fp16 = softmax(axis = var_779, x = aw_73_cast_fp16)[name = tensor("op_1339_cast_fp16")]; + tensor var_1340_cast_fp16 = softmax(axis = var_779, x = aw_75_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1341_cast_fp16 = softmax(axis = var_779, x = aw_77_cast_fp16)[name = tensor("op_1341_cast_fp16")]; + tensor var_1342_cast_fp16 = softmax(axis = var_779, x = aw_79_cast_fp16)[name = tensor("op_1342_cast_fp16")]; + tensor var_1343_cast_fp16 = softmax(axis = var_779, x = aw_81_cast_fp16)[name = tensor("op_1343_cast_fp16")]; + tensor var_1344_cast_fp16 = softmax(axis = var_779, x = aw_83_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1345_cast_fp16 = softmax(axis = var_779, x = aw_85_cast_fp16)[name = tensor("op_1345_cast_fp16")]; + tensor var_1346_cast_fp16 = softmax(axis = var_779, x = aw_87_cast_fp16)[name = tensor("op_1346_cast_fp16")]; + tensor var_1347_cast_fp16 = softmax(axis = var_779, x = aw_89_cast_fp16)[name = tensor("op_1347_cast_fp16")]; + tensor var_1348_cast_fp16 = softmax(axis = var_779, x = aw_91_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1349_cast_fp16 = softmax(axis = var_779, x = aw_93_cast_fp16)[name = tensor("op_1349_cast_fp16")]; + tensor var_1350_cast_fp16 = softmax(axis = var_779, x = aw_cast_fp16)[name = tensor("op_1350_cast_fp16")]; + tensor var_1352_equation_0 = const()[name = tensor("op_1352_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1352_cast_fp16 = einsum(equation = var_1352_equation_0, values = (var_1112_cast_fp16, var_1327_cast_fp16))[name = tensor("op_1352_cast_fp16")]; + tensor var_1354_equation_0 = const()[name = tensor("op_1354_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1354_cast_fp16 = einsum(equation = var_1354_equation_0, values = (var_1112_cast_fp16, var_1328_cast_fp16))[name = tensor("op_1354_cast_fp16")]; + tensor var_1356_equation_0 = const()[name = tensor("op_1356_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1356_cast_fp16 = einsum(equation = var_1356_equation_0, values = (var_1112_cast_fp16, var_1329_cast_fp16))[name = tensor("op_1356_cast_fp16")]; + tensor var_1358_equation_0 = const()[name = tensor("op_1358_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1358_cast_fp16 = einsum(equation = var_1358_equation_0, values = (var_1124_cast_fp16, var_1330_cast_fp16))[name = tensor("op_1358_cast_fp16")]; + tensor var_1360_equation_0 = const()[name = tensor("op_1360_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1360_cast_fp16 = einsum(equation = var_1360_equation_0, values = (var_1124_cast_fp16, var_1331_cast_fp16))[name = tensor("op_1360_cast_fp16")]; + tensor var_1362_equation_0 = const()[name = tensor("op_1362_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1362_cast_fp16 = einsum(equation = var_1362_equation_0, values = (var_1124_cast_fp16, var_1332_cast_fp16))[name = tensor("op_1362_cast_fp16")]; + tensor var_1364_equation_0 = const()[name = tensor("op_1364_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1364_cast_fp16 = einsum(equation = var_1364_equation_0, values = (var_1136_cast_fp16, var_1333_cast_fp16))[name = tensor("op_1364_cast_fp16")]; + tensor var_1366_equation_0 = const()[name = tensor("op_1366_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1366_cast_fp16 = einsum(equation = var_1366_equation_0, values = (var_1136_cast_fp16, var_1334_cast_fp16))[name = tensor("op_1366_cast_fp16")]; + tensor var_1368_equation_0 = const()[name = tensor("op_1368_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1368_cast_fp16 = einsum(equation = var_1368_equation_0, values = (var_1136_cast_fp16, var_1335_cast_fp16))[name = tensor("op_1368_cast_fp16")]; + tensor var_1370_equation_0 = const()[name = tensor("op_1370_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1370_cast_fp16 = einsum(equation = var_1370_equation_0, values = (var_1148_cast_fp16, var_1336_cast_fp16))[name = tensor("op_1370_cast_fp16")]; + tensor var_1372_equation_0 = const()[name = tensor("op_1372_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1372_cast_fp16 = einsum(equation = var_1372_equation_0, values = (var_1148_cast_fp16, var_1337_cast_fp16))[name = tensor("op_1372_cast_fp16")]; + tensor var_1374_equation_0 = const()[name = tensor("op_1374_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1374_cast_fp16 = einsum(equation = var_1374_equation_0, values = (var_1148_cast_fp16, var_1338_cast_fp16))[name = tensor("op_1374_cast_fp16")]; + tensor var_1376_equation_0 = const()[name = tensor("op_1376_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1376_cast_fp16 = einsum(equation = var_1376_equation_0, values = (var_1160_cast_fp16, var_1339_cast_fp16))[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_equation_0 = const()[name = tensor("op_1378_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1378_cast_fp16 = einsum(equation = var_1378_equation_0, values = (var_1160_cast_fp16, var_1340_cast_fp16))[name = tensor("op_1378_cast_fp16")]; + tensor var_1380_equation_0 = const()[name = tensor("op_1380_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1380_cast_fp16 = einsum(equation = var_1380_equation_0, values = (var_1160_cast_fp16, var_1341_cast_fp16))[name = tensor("op_1380_cast_fp16")]; + tensor var_1382_equation_0 = const()[name = tensor("op_1382_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1382_cast_fp16 = einsum(equation = var_1382_equation_0, values = (var_1172_cast_fp16, var_1342_cast_fp16))[name = tensor("op_1382_cast_fp16")]; + tensor var_1384_equation_0 = const()[name = tensor("op_1384_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1384_cast_fp16 = einsum(equation = var_1384_equation_0, values = (var_1172_cast_fp16, var_1343_cast_fp16))[name = tensor("op_1384_cast_fp16")]; + tensor var_1386_equation_0 = const()[name = tensor("op_1386_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1386_cast_fp16 = einsum(equation = var_1386_equation_0, values = (var_1172_cast_fp16, var_1344_cast_fp16))[name = tensor("op_1386_cast_fp16")]; + tensor var_1388_equation_0 = const()[name = tensor("op_1388_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1388_cast_fp16 = einsum(equation = var_1388_equation_0, values = (var_1184_cast_fp16, var_1345_cast_fp16))[name = tensor("op_1388_cast_fp16")]; + tensor var_1390_equation_0 = const()[name = tensor("op_1390_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1390_cast_fp16 = einsum(equation = var_1390_equation_0, values = (var_1184_cast_fp16, var_1346_cast_fp16))[name = tensor("op_1390_cast_fp16")]; + tensor var_1392_equation_0 = const()[name = tensor("op_1392_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1392_cast_fp16 = einsum(equation = var_1392_equation_0, values = (var_1184_cast_fp16, var_1347_cast_fp16))[name = tensor("op_1392_cast_fp16")]; + tensor var_1394_equation_0 = const()[name = tensor("op_1394_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1394_cast_fp16 = einsum(equation = var_1394_equation_0, values = (var_1196_cast_fp16, var_1348_cast_fp16))[name = tensor("op_1394_cast_fp16")]; + tensor var_1396_equation_0 = const()[name = tensor("op_1396_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1396_cast_fp16 = einsum(equation = var_1396_equation_0, values = (var_1196_cast_fp16, var_1349_cast_fp16))[name = tensor("op_1396_cast_fp16")]; + tensor var_1398_equation_0 = const()[name = tensor("op_1398_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1398_cast_fp16 = einsum(equation = var_1398_equation_0, values = (var_1196_cast_fp16, var_1350_cast_fp16))[name = tensor("op_1398_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_779, interleave = x_27_interleave_0, values = (var_1352_cast_fp16, var_1354_cast_fp16, var_1356_cast_fp16, var_1358_cast_fp16, var_1360_cast_fp16, var_1362_cast_fp16, var_1364_cast_fp16, var_1366_cast_fp16, var_1368_cast_fp16, var_1370_cast_fp16, var_1372_cast_fp16, var_1374_cast_fp16, var_1376_cast_fp16, var_1378_cast_fp16, var_1380_cast_fp16, var_1382_cast_fp16, var_1384_cast_fp16, var_1386_cast_fp16, var_1388_cast_fp16, var_1390_cast_fp16, var_1392_cast_fp16, var_1394_cast_fp16, var_1396_cast_fp16, var_1398_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1403 = const()[name = tensor("op_1403"), val = tensor([1, 3072, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1403, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1406 = const()[name = tensor("op_1406"), val = tensor([1, 1])]; + tensor var_1408 = const()[name = tensor("op_1408"), val = tensor([1, 1])]; + tensor attention_output_pad_type_0 = const()[name = tensor("attention_output_pad_type_0"), val = tensor("custom")]; + tensor attention_output_pad_0 = const()[name = tensor("attention_output_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232803776)))]; + tensor attention_output_cast_fp16 = conv(dilations = var_1408, groups = var_779, pad = attention_output_pad_0, pad_type = attention_output_pad_type_0, strides = var_1406, weight = blocks_1_attn_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678208)))]; + tensor x_eps_cast_fp16 = concat(axis = var_779, interleave = x_eps_interleave_0, values = (x_29_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_782, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1434_to_fp16 = const()[name = tensor("op_1434_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1434_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678400)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1445 = const()[name = tensor("op_1445"), val = tensor([1, 1])]; + tensor var_1447 = const()[name = tensor("op_1447"), val = tensor([1, 1])]; + tensor input_17_pad_type_0 = const()[name = tensor("input_17_pad_type_0"), val = tensor("custom")]; + tensor input_17_pad_0 = const()[name = tensor("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251684608)))]; + tensor input_17_cast_fp16 = conv(dilations = var_1447, groups = var_779, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_1445, weight = blocks_1_mlp_fc_1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1451 = const()[name = tensor("op_1451"), val = tensor([1, 1])]; + tensor var_1453 = const()[name = tensor("op_1453"), val = tensor([1, 1])]; + tensor x_fc_2_pad_type_0 = const()[name = tensor("x_fc_2_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_pad_0 = const()[name = tensor("x_fc_2_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302016320)))]; + tensor x_fc_2_cast_fp16 = conv(dilations = var_1453, groups = var_779, pad = x_fc_2_pad_0, pad_type = x_fc_2_pad_type_0, strides = var_1451, weight = blocks_1_mlp_fc_2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_1456_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1456_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_1456_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_1459 = const()[name = tensor("op_1459"), val = tensor([1, 1])]; + tensor var_1461 = const()[name = tensor("op_1461"), val = tensor([1, 1])]; + tensor var_1463_pad_type_0 = const()[name = tensor("op_1463_pad_type_0"), val = tensor("custom")]; + tensor var_1463_pad_0 = const()[name = tensor("op_1463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(352348032)))]; + tensor var_1463_cast_fp16 = conv(dilations = var_1461, groups = var_779, pad = var_1463_pad_0, pad_type = var_1463_pad_type_0, strides = var_1459, weight = blocks_1_mlp_proj_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_1463_cast_fp16")]; + tensor new_x = add(x = var_1463_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_1464_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_v_cache_0, new_k_cache_1, new_v_cache_1); +} \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk15.mlmodelc/weights/weight.bin b/Llama-3.2-3B-Instruct_chunk15.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..9fb4cdb8f6ee656a85f4d01e3c9a1c1ca0e66302 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk15.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2cb9d6d8288946add36334ed30dca14912099e940d2f8989248bac042f9112d1 +size 402679744 diff --git a/Llama-3.2-3B-Instruct_chunk16.mlmodelc/analytics/coremldata.bin b/Llama-3.2-3B-Instruct_chunk16.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..cbeeb3500b890cb12054b285c60ccf293726d4b5 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk16.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:099d892d9754805b9d755f8a563efcf8322cc8319dee028d51b62ca558115cb5 +size 243 diff --git a/Llama-3.2-3B-Instruct_chunk16.mlmodelc/coremldata.bin b/Llama-3.2-3B-Instruct_chunk16.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..baa88d80a77c4c03df2caf2dc6b2b21a213555e2 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk16.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:351176279ef47a4a1ba4a1c69135c7c59e5181f28a16fe3d47f04bd2a80c5863 +size 501 diff --git a/Llama-3.2-3B-Instruct_chunk16.mlmodelc/metadata.json b/Llama-3.2-3B-Instruct_chunk16.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..2ad09b4845167243f7fc4b556669fb7d84f08c3b --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk16.mlmodelc/metadata.json @@ -0,0 +1,134 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 16384)", + "shortDescription" : "", + "shape" : "[1, 64, 16384]", + "name" : "logits_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 16384)", + "shortDescription" : "", + "shape" : "[1, 64, 16384]", + "name" : "logits_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 16384)", + "shortDescription" : "", + "shape" : "[1, 64, 16384]", + "name" : "logits_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 16384)", + "shortDescription" : "", + "shape" : "[1, 64, 16384]", + "name" : "logits_3", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 16384)", + "shortDescription" : "", + "shape" : "[1, 64, 16384]", + "name" : "logits_4", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 16384)", + "shortDescription" : "", + "shape" : "[1, 64, 16384]", + "name" : "logits_5", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 16384)", + "shortDescription" : "", + "shape" : "[1, 64, 16384]", + "name" : "logits_6", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 13568)", + "shortDescription" : "", + "shape" : "[1, 64, 13568]", + "name" : "logits_7", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 1, + "Ios16.mul" : 2, + "Squeeze" : 1, + "Transpose" : 1, + "Ios16.reshape" : 10, + "Ios16.matmul" : 8, + "Ios16.realDiv" : 1, + "Ios16.reduceL2Norm" : 1 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "x", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_3_2_3B_Instruct_2024_11_09_16_14_37_chunk16", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk16.mlmodelc/model.mil b/Llama-3.2-3B-Instruct_chunk16.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..42266367a5b8a44218c5b7d2e5cc7770217a6081 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk16.mlmodelc/model.mil @@ -0,0 +1,74 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor x) { + tensor var_6 = const()[name = tensor("op_6"), val = tensor(true)]; + tensor var_9 = const()[name = tensor("op_9"), val = tensor(1)]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_eps_cast_fp16 = concat(axis = var_9, interleave = x_eps_interleave_0, values = (x, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_6, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_34_to_fp16 = const()[name = tensor("op_34_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_34_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor ln_f_weight_to_fp16 = const()[name = tensor("ln_f_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = ln_f_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_48 = const()[name = tensor("op_48"), val = tensor([1, 3072, 1, -1])]; + tensor x_cast_fp16 = reshape(shape = var_48, x = x_5_cast_fp16)[name = tensor("x_cast_fp16")]; + tensor var_51_axes_0 = const()[name = tensor("op_51_axes_0"), val = tensor([2])]; + tensor var_51_cast_fp16 = squeeze(axes = var_51_axes_0, x = x_cast_fp16)[name = tensor("op_51_cast_fp16")]; + tensor var_54_perm_0 = const()[name = tensor("op_54_perm_0"), val = tensor([0, 2, 1])]; + tensor concat_4 = const()[name = tensor("concat_4"), val = tensor([64, 3072])]; + tensor var_54_cast_fp16 = transpose(perm = var_54_perm_0, x = var_51_cast_fp16)[name = tensor("transpose_16")]; + tensor reshape_0_cast_fp16 = reshape(shape = concat_4, x = var_54_cast_fp16)[name = tensor("reshape_0_cast_fp16")]; + tensor matmul_0_transpose_x_0 = const()[name = tensor("matmul_0_transpose_x_0"), val = tensor(false)]; + tensor matmul_0_transpose_y_0 = const()[name = tensor("matmul_0_transpose_y_0"), val = tensor(false)]; + tensor transpose_1_to_fp16 = const()[name = tensor("transpose_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6464)))]; + tensor matmul_0_cast_fp16 = matmul(transpose_x = matmul_0_transpose_x_0, transpose_y = matmul_0_transpose_y_0, x = reshape_0_cast_fp16, y = transpose_1_to_fp16)[name = tensor("matmul_0_cast_fp16")]; + tensor concat_8 = const()[name = tensor("concat_8"), val = tensor([1, 64, 16384])]; + tensor logits_0 = reshape(shape = concat_8, x = matmul_0_cast_fp16)[name = tensor("reshape_2_cast_fp16")]; + tensor matmul_1_transpose_x_0 = const()[name = tensor("matmul_1_transpose_x_0"), val = tensor(false)]; + tensor matmul_1_transpose_y_0 = const()[name = tensor("matmul_1_transpose_y_0"), val = tensor(false)]; + tensor transpose_3_to_fp16 = const()[name = tensor("transpose_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100669824)))]; + tensor matmul_1_cast_fp16 = matmul(transpose_x = matmul_1_transpose_x_0, transpose_y = matmul_1_transpose_y_0, x = reshape_0_cast_fp16, y = transpose_3_to_fp16)[name = tensor("matmul_1_cast_fp16")]; + tensor concat_16 = const()[name = tensor("concat_16"), val = tensor([1, 64, 16384])]; + tensor logits_1 = reshape(shape = concat_16, x = matmul_1_cast_fp16)[name = tensor("reshape_5_cast_fp16")]; + tensor matmul_2_transpose_x_0 = const()[name = tensor("matmul_2_transpose_x_0"), val = tensor(false)]; + tensor matmul_2_transpose_y_0 = const()[name = tensor("matmul_2_transpose_y_0"), val = tensor(false)]; + tensor transpose_5_to_fp16 = const()[name = tensor("transpose_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201333184)))]; + tensor matmul_2_cast_fp16 = matmul(transpose_x = matmul_2_transpose_x_0, transpose_y = matmul_2_transpose_y_0, x = reshape_0_cast_fp16, y = transpose_5_to_fp16)[name = tensor("matmul_2_cast_fp16")]; + tensor concat_24 = const()[name = tensor("concat_24"), val = tensor([1, 64, 16384])]; + tensor logits_2 = reshape(shape = concat_24, x = matmul_2_cast_fp16)[name = tensor("reshape_8_cast_fp16")]; + tensor matmul_3_transpose_x_0 = const()[name = tensor("matmul_3_transpose_x_0"), val = tensor(false)]; + tensor matmul_3_transpose_y_0 = const()[name = tensor("matmul_3_transpose_y_0"), val = tensor(false)]; + tensor transpose_7_to_fp16 = const()[name = tensor("transpose_7_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(301996544)))]; + tensor matmul_3_cast_fp16 = matmul(transpose_x = matmul_3_transpose_x_0, transpose_y = matmul_3_transpose_y_0, x = reshape_0_cast_fp16, y = transpose_7_to_fp16)[name = tensor("matmul_3_cast_fp16")]; + tensor concat_32 = const()[name = tensor("concat_32"), val = tensor([1, 64, 16384])]; + tensor logits_3 = reshape(shape = concat_32, x = matmul_3_cast_fp16)[name = tensor("reshape_11_cast_fp16")]; + tensor matmul_4_transpose_x_0 = const()[name = tensor("matmul_4_transpose_x_0"), val = tensor(false)]; + tensor matmul_4_transpose_y_0 = const()[name = tensor("matmul_4_transpose_y_0"), val = tensor(false)]; + tensor transpose_9_to_fp16 = const()[name = tensor("transpose_9_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(402659904)))]; + tensor matmul_4_cast_fp16 = matmul(transpose_x = matmul_4_transpose_x_0, transpose_y = matmul_4_transpose_y_0, x = reshape_0_cast_fp16, y = transpose_9_to_fp16)[name = tensor("matmul_4_cast_fp16")]; + tensor concat_40 = const()[name = tensor("concat_40"), val = tensor([1, 64, 16384])]; + tensor logits_4 = reshape(shape = concat_40, x = matmul_4_cast_fp16)[name = tensor("reshape_14_cast_fp16")]; + tensor matmul_5_transpose_x_0 = const()[name = tensor("matmul_5_transpose_x_0"), val = tensor(false)]; + tensor matmul_5_transpose_y_0 = const()[name = tensor("matmul_5_transpose_y_0"), val = tensor(false)]; + tensor transpose_11_to_fp16 = const()[name = tensor("transpose_11_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(503323264)))]; + tensor matmul_5_cast_fp16 = matmul(transpose_x = matmul_5_transpose_x_0, transpose_y = matmul_5_transpose_y_0, x = reshape_0_cast_fp16, y = transpose_11_to_fp16)[name = tensor("matmul_5_cast_fp16")]; + tensor concat_48 = const()[name = tensor("concat_48"), val = tensor([1, 64, 16384])]; + tensor logits_5 = reshape(shape = concat_48, x = matmul_5_cast_fp16)[name = tensor("reshape_17_cast_fp16")]; + tensor matmul_6_transpose_x_0 = const()[name = tensor("matmul_6_transpose_x_0"), val = tensor(false)]; + tensor matmul_6_transpose_y_0 = const()[name = tensor("matmul_6_transpose_y_0"), val = tensor(false)]; + tensor transpose_13_to_fp16 = const()[name = tensor("transpose_13_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(603986624)))]; + tensor matmul_6_cast_fp16 = matmul(transpose_x = matmul_6_transpose_x_0, transpose_y = matmul_6_transpose_y_0, x = reshape_0_cast_fp16, y = transpose_13_to_fp16)[name = tensor("matmul_6_cast_fp16")]; + tensor concat_56 = const()[name = tensor("concat_56"), val = tensor([1, 64, 16384])]; + tensor logits_6 = reshape(shape = concat_56, x = matmul_6_cast_fp16)[name = tensor("reshape_20_cast_fp16")]; + tensor matmul_7_transpose_x_0 = const()[name = tensor("matmul_7_transpose_x_0"), val = tensor(false)]; + tensor matmul_7_transpose_y_0 = const()[name = tensor("matmul_7_transpose_y_0"), val = tensor(false)]; + tensor transpose_15_to_fp16 = const()[name = tensor("transpose_15_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(704649984)))]; + tensor matmul_7_cast_fp16 = matmul(transpose_x = matmul_7_transpose_x_0, transpose_y = matmul_7_transpose_y_0, x = reshape_0_cast_fp16, y = transpose_15_to_fp16)[name = tensor("matmul_7_cast_fp16")]; + tensor concat_64 = const()[name = tensor("concat_64"), val = tensor([1, 64, 13568])]; + tensor logits_7 = reshape(shape = concat_64, x = matmul_7_cast_fp16)[name = tensor("reshape_23_cast_fp16")]; + } -> (logits_0, logits_1, logits_2, logits_3, logits_4, logits_5, logits_6, logits_7); +} \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk16.mlmodelc/weights/weight.bin b/Llama-3.2-3B-Instruct_chunk16.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..6c17545ff590d111e236c588b117c45f2c80f0dc --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk16.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fd01e5605ef4a5bebc9bcafc5514e012c4c605231c065a1d634fd9fc66df411 +size 788011840 diff --git a/Llama-3.2-3B-Instruct_chunk2.mlmodelc/analytics/coremldata.bin b/Llama-3.2-3B-Instruct_chunk2.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a63af39cde8e590e41fffd270ab8aede737490d --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk2.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf21e446e7587de3fd840eae95f3e79729298df568725552f7ef5fd8f954e58c +size 243 diff --git a/Llama-3.2-3B-Instruct_chunk2.mlmodelc/coremldata.bin b/Llama-3.2-3B-Instruct_chunk2.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..3fed05170d981b8582c9421ec7550f748512caf2 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk2.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b45f96f9ba201e16f197a78412041f41d2ac869df9ad95ef03af7662e7d940 +size 653 diff --git a/Llama-3.2-3B-Instruct_chunk2.mlmodelc/metadata.json b/Llama-3.2-3B-Instruct_chunk2.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..d412b526b988fdf6d178a4c78fced9a569ebadcf --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk2.mlmodelc/metadata.json @@ -0,0 +1,178 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 14, + "Ios16.mul" : 70, + "SliceByIndex" : 88, + "Transpose" : 2, + "Ios16.einsum" : 96, + "Ios16.conv" : 14, + "Ios16.add" : 56, + "Ios16.realDiv" : 4, + "Ios16.softmax" : 48, + "Ios16.reduceL2Norm" : 4, + "Ios16.reshape" : 14, + "Ios16.silu" : 2 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 512, 1, 64]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_3_2_3B_Instruct_2024_11_09_16_14_37_chunk2", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk2.mlmodelc/model.mil b/Llama-3.2-3B-Instruct_chunk2.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..78594b4291dc45ae43652f9a31200581b19ad3c6 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk2.mlmodelc/model.mil @@ -0,0 +1,956 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { + tensor var_13 = const()[name = tensor("op_13"), val = tensor(-1)]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-2)]; + tensor var_19 = const()[name = tensor("op_19"), val = tensor(-3)]; + tensor var_52 = const()[name = tensor("op_52"), val = tensor(1)]; + tensor var_55 = const()[name = tensor("op_55"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_52, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_55, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_79_to_fp16 = const()[name = tensor("op_79_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_79_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_100 = const()[name = tensor("op_100"), val = tensor([1, 3072, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_100, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_103 = const()[name = tensor("op_103"), val = tensor([1, 1])]; + tensor var_105 = const()[name = tensor("op_105"), val = tensor([1, 1])]; + tensor q_1_pad_type_0 = const()[name = tensor("q_1_pad_type_0"), val = tensor("custom")]; + tensor q_1_pad_0 = const()[name = tensor("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6464)))]; + tensor q_1_cast_fp16 = conv(dilations = var_105, groups = var_52, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = var_103, weight = blocks_0_attn_q_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_109 = const()[name = tensor("op_109"), val = tensor([1, 1])]; + tensor var_111 = const()[name = tensor("op_111"), val = tensor([1, 1])]; + tensor k_1_pad_type_0 = const()[name = tensor("k_1_pad_type_0"), val = tensor("custom")]; + tensor k_1_pad_0 = const()[name = tensor("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18880896)))]; + tensor k_1_cast_fp16 = conv(dilations = var_111, groups = var_52, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = var_109, weight = blocks_0_attn_k_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_115 = const()[name = tensor("op_115"), val = tensor([1, 1])]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor v_1_pad_type_0 = const()[name = tensor("v_1_pad_type_0"), val = tensor("custom")]; + tensor v_1_pad_0 = const()[name = tensor("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25172416)))]; + tensor v_1_cast_fp16 = conv(dilations = var_117, groups = var_52, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = var_115, weight = blocks_0_attn_v_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_120 = const()[name = tensor("op_120"), val = tensor([1, 24, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_120, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_122 = const()[name = tensor("op_122"), val = tensor([1, -1, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_122, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_136_begin_0 = const()[name = tensor("op_136_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_136_end_0 = const()[name = tensor("op_136_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_136_end_mask_0 = const()[name = tensor("op_136_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_136_cast_fp16 = slice_by_index(begin = var_136_begin_0, end = var_136_end_0, end_mask = var_136_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_136_cast_fp16")]; + tensor var_142_begin_0 = const()[name = tensor("op_142_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_142_end_0 = const()[name = tensor("op_142_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_142_end_mask_0 = const()[name = tensor("op_142_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_142_cast_fp16 = slice_by_index(begin = var_142_begin_0, end = var_142_end_0, end_mask = var_142_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_142_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_144_cast_fp16 = mul(x = var_142_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_144_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_17, interleave = rotated_1_interleave_0, values = (var_144_cast_fp16, var_136_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_147_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_147_cast_fp16")]; + tensor var_148_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_148_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_147_cast_fp16, y = var_148_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor var_167_begin_0 = const()[name = tensor("op_167_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_167_end_0 = const()[name = tensor("op_167_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_167_end_mask_0 = const()[name = tensor("op_167_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_167_cast_fp16 = slice_by_index(begin = var_167_begin_0, end = var_167_end_0, end_mask = var_167_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_167_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_169_cast_fp16 = mul(x = var_167_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_169_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_17, interleave = rotated_3_interleave_0, values = (var_169_cast_fp16, var_161_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_172_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_172_cast_fp16")]; + tensor var_173_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_173_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_172_cast_fp16, y = var_173_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_176 = const()[name = tensor("op_176"), val = tensor([1, -1, 1, 64])]; + tensor k_7_cast_fp16 = reshape(shape = var_176, x = roped_3_cast_fp16)[name = tensor("k_7_cast_fp16")]; + tensor var_178 = const()[name = tensor("op_178"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_178, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_11_interleave_0 = const()[name = tensor("k_11_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_9_perm_0, x = k_7_cast_fp16)[name = tensor("transpose_1")]; + tensor k_11_cast_fp16 = concat(axis = var_19, interleave = k_11_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_11_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_13, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1, 3072, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_186, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_191_begin_0 = const()[name = tensor("op_191_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_191_end_0 = const()[name = tensor("op_191_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_191_end_mask_0 = const()[name = tensor("op_191_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_191_cast_fp16 = slice_by_index(begin = var_191_begin_0, end = var_191_end_0, end_mask = var_191_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_191_cast_fp16")]; + tensor var_195_begin_0 = const()[name = tensor("op_195_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_195_end_0 = const()[name = tensor("op_195_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_195_end_mask_0 = const()[name = tensor("op_195_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_195_cast_fp16 = slice_by_index(begin = var_195_begin_0, end = var_195_end_0, end_mask = var_195_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_195_cast_fp16")]; + tensor var_199_begin_0 = const()[name = tensor("op_199_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_199_end_0 = const()[name = tensor("op_199_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_199_end_mask_0 = const()[name = tensor("op_199_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_199_cast_fp16 = slice_by_index(begin = var_199_begin_0, end = var_199_end_0, end_mask = var_199_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_199_cast_fp16")]; + tensor var_203_begin_0 = const()[name = tensor("op_203_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_203_end_0 = const()[name = tensor("op_203_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_203_end_mask_0 = const()[name = tensor("op_203_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_203_cast_fp16 = slice_by_index(begin = var_203_begin_0, end = var_203_end_0, end_mask = var_203_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_203_cast_fp16")]; + tensor var_207_begin_0 = const()[name = tensor("op_207_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_207_end_0 = const()[name = tensor("op_207_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_207_end_mask_0 = const()[name = tensor("op_207_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = var_207_end_0, end_mask = var_207_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_207_cast_fp16")]; + tensor var_211_begin_0 = const()[name = tensor("op_211_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_211_end_0 = const()[name = tensor("op_211_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_211_end_mask_0 = const()[name = tensor("op_211_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_211_cast_fp16 = slice_by_index(begin = var_211_begin_0, end = var_211_end_0, end_mask = var_211_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_211_cast_fp16")]; + tensor var_215_begin_0 = const()[name = tensor("op_215_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_215_end_0 = const()[name = tensor("op_215_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_215_end_mask_0 = const()[name = tensor("op_215_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_215_cast_fp16 = slice_by_index(begin = var_215_begin_0, end = var_215_end_0, end_mask = var_215_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_215_cast_fp16")]; + tensor var_219_begin_0 = const()[name = tensor("op_219_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_219_end_0 = const()[name = tensor("op_219_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_219_end_mask_0 = const()[name = tensor("op_219_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_219_cast_fp16 = slice_by_index(begin = var_219_begin_0, end = var_219_end_0, end_mask = var_219_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_219_cast_fp16")]; + tensor var_223_begin_0 = const()[name = tensor("op_223_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_223_end_0 = const()[name = tensor("op_223_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_223_end_mask_0 = const()[name = tensor("op_223_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_223_cast_fp16 = slice_by_index(begin = var_223_begin_0, end = var_223_end_0, end_mask = var_223_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_223_cast_fp16")]; + tensor var_227_begin_0 = const()[name = tensor("op_227_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_227_end_0 = const()[name = tensor("op_227_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_227_end_mask_0 = const()[name = tensor("op_227_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_227_cast_fp16")]; + tensor var_231_begin_0 = const()[name = tensor("op_231_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_231_end_0 = const()[name = tensor("op_231_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_231_end_mask_0 = const()[name = tensor("op_231_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_231_cast_fp16 = slice_by_index(begin = var_231_begin_0, end = var_231_end_0, end_mask = var_231_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_231_cast_fp16")]; + tensor var_235_begin_0 = const()[name = tensor("op_235_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_235_end_0 = const()[name = tensor("op_235_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_235_end_mask_0 = const()[name = tensor("op_235_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = var_235_end_0, end_mask = var_235_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_235_cast_fp16")]; + tensor var_239_begin_0 = const()[name = tensor("op_239_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_239_end_0 = const()[name = tensor("op_239_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_239_end_mask_0 = const()[name = tensor("op_239_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_239_cast_fp16")]; + tensor var_243_begin_0 = const()[name = tensor("op_243_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_243_end_0 = const()[name = tensor("op_243_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_243_end_mask_0 = const()[name = tensor("op_243_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_243_cast_fp16")]; + tensor var_247_begin_0 = const()[name = tensor("op_247_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_247_end_0 = const()[name = tensor("op_247_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_247_end_mask_0 = const()[name = tensor("op_247_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_247_cast_fp16")]; + tensor var_251_begin_0 = const()[name = tensor("op_251_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_251_end_0 = const()[name = tensor("op_251_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_251_end_mask_0 = const()[name = tensor("op_251_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_251_cast_fp16")]; + tensor var_255_begin_0 = const()[name = tensor("op_255_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_255_end_0 = const()[name = tensor("op_255_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_255_end_mask_0 = const()[name = tensor("op_255_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_255_cast_fp16")]; + tensor var_259_begin_0 = const()[name = tensor("op_259_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_259_end_0 = const()[name = tensor("op_259_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_259_end_mask_0 = const()[name = tensor("op_259_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_263_cast_fp16")]; + tensor var_267_begin_0 = const()[name = tensor("op_267_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_267_end_0 = const()[name = tensor("op_267_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_267_end_mask_0 = const()[name = tensor("op_267_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_267_cast_fp16")]; + tensor var_271_begin_0 = const()[name = tensor("op_271_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_271_end_0 = const()[name = tensor("op_271_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_271_end_mask_0 = const()[name = tensor("op_271_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_271_cast_fp16")]; + tensor var_275_begin_0 = const()[name = tensor("op_275_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_275_end_0 = const()[name = tensor("op_275_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_275_end_mask_0 = const()[name = tensor("op_275_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_275_cast_fp16")]; + tensor var_279_begin_0 = const()[name = tensor("op_279_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_279_end_0 = const()[name = tensor("op_279_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_279_end_mask_0 = const()[name = tensor("op_279_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_279_cast_fp16")]; + tensor var_283_begin_0 = const()[name = tensor("op_283_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_283_end_0 = const()[name = tensor("op_283_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_283_end_mask_0 = const()[name = tensor("op_283_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_283_cast_fp16")]; + tensor var_289_begin_0 = const()[name = tensor("op_289_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_289_end_0 = const()[name = tensor("op_289_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_289_end_mask_0 = const()[name = tensor("op_289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_289_cast_fp16")]; + tensor var_301_begin_0 = const()[name = tensor("op_301_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_301_end_0 = const()[name = tensor("op_301_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_301_end_mask_0 = const()[name = tensor("op_301_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_301_cast_fp16")]; + tensor var_313_begin_0 = const()[name = tensor("op_313_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_313_end_0 = const()[name = tensor("op_313_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_313_end_mask_0 = const()[name = tensor("op_313_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_313_cast_fp16 = slice_by_index(begin = var_313_begin_0, end = var_313_end_0, end_mask = var_313_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_313_cast_fp16")]; + tensor var_325_begin_0 = const()[name = tensor("op_325_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_325_end_0 = const()[name = tensor("op_325_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_325_end_mask_0 = const()[name = tensor("op_325_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_325_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_349_begin_0 = const()[name = tensor("op_349_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_349_end_0 = const()[name = tensor("op_349_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_349_end_mask_0 = const()[name = tensor("op_349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_349_cast_fp16 = slice_by_index(begin = var_349_begin_0, end = var_349_end_0, end_mask = var_349_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_349_cast_fp16")]; + tensor var_361_begin_0 = const()[name = tensor("op_361_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_361_end_0 = const()[name = tensor("op_361_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_361_end_mask_0 = const()[name = tensor("op_361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_361_cast_fp16 = slice_by_index(begin = var_361_begin_0, end = var_361_end_0, end_mask = var_361_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_361_cast_fp16")]; + tensor var_373_begin_0 = const()[name = tensor("op_373_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_373_end_0 = const()[name = tensor("op_373_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_373_end_mask_0 = const()[name = tensor("op_373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_373_cast_fp16")]; + tensor var_383_begin_0 = const()[name = tensor("op_383_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_383_end_0 = const()[name = tensor("op_383_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_383_end_mask_0 = const()[name = tensor("op_383_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_383_cast_fp16")]; + tensor var_395_begin_0 = const()[name = tensor("op_395_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_395_end_0 = const()[name = tensor("op_395_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_395_end_mask_0 = const()[name = tensor("op_395_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_395_cast_fp16")]; + tensor var_407_begin_0 = const()[name = tensor("op_407_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_407_end_0 = const()[name = tensor("op_407_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_407_end_mask_0 = const()[name = tensor("op_407_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_407_cast_fp16 = slice_by_index(begin = var_407_begin_0, end = var_407_end_0, end_mask = var_407_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_407_cast_fp16")]; + tensor var_419_begin_0 = const()[name = tensor("op_419_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_419_end_0 = const()[name = tensor("op_419_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_419_end_mask_0 = const()[name = tensor("op_419_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_419_cast_fp16 = slice_by_index(begin = var_419_begin_0, end = var_419_end_0, end_mask = var_419_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_419_cast_fp16")]; + tensor var_431_begin_0 = const()[name = tensor("op_431_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_431_end_0 = const()[name = tensor("op_431_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_431_end_mask_0 = const()[name = tensor("op_431_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_431_cast_fp16 = slice_by_index(begin = var_431_begin_0, end = var_431_end_0, end_mask = var_431_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_431_cast_fp16")]; + tensor var_443_begin_0 = const()[name = tensor("op_443_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_443_end_0 = const()[name = tensor("op_443_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_443_end_mask_0 = const()[name = tensor("op_443_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_443_cast_fp16 = slice_by_index(begin = var_443_begin_0, end = var_443_end_0, end_mask = var_443_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_443_cast_fp16")]; + tensor var_455_begin_0 = const()[name = tensor("op_455_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_455_end_0 = const()[name = tensor("op_455_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_455_end_mask_0 = const()[name = tensor("op_455_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_455_cast_fp16")]; + tensor var_467_begin_0 = const()[name = tensor("op_467_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_467_end_0 = const()[name = tensor("op_467_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_467_end_mask_0 = const()[name = tensor("op_467_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_467_cast_fp16 = slice_by_index(begin = var_467_begin_0, end = var_467_end_0, end_mask = var_467_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_467_cast_fp16")]; + tensor var_479_equation_0 = const()[name = tensor("op_479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_289_cast_fp16, var_191_cast_fp16))[name = tensor("op_479_cast_fp16")]; + tensor var_480_to_fp16 = const()[name = tensor("op_480_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_481_cast_fp16 = mul(x = var_479_cast_fp16, y = var_480_to_fp16)[name = tensor("op_481_cast_fp16")]; + tensor var_483_equation_0 = const()[name = tensor("op_483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_289_cast_fp16, var_195_cast_fp16))[name = tensor("op_483_cast_fp16")]; + tensor var_484_to_fp16 = const()[name = tensor("op_484_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_485_cast_fp16 = mul(x = var_483_cast_fp16, y = var_484_to_fp16)[name = tensor("op_485_cast_fp16")]; + tensor var_487_equation_0 = const()[name = tensor("op_487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_487_cast_fp16 = einsum(equation = var_487_equation_0, values = (var_289_cast_fp16, var_199_cast_fp16))[name = tensor("op_487_cast_fp16")]; + tensor var_488_to_fp16 = const()[name = tensor("op_488_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_489_cast_fp16 = mul(x = var_487_cast_fp16, y = var_488_to_fp16)[name = tensor("op_489_cast_fp16")]; + tensor var_491_equation_0 = const()[name = tensor("op_491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_491_cast_fp16 = einsum(equation = var_491_equation_0, values = (var_301_cast_fp16, var_203_cast_fp16))[name = tensor("op_491_cast_fp16")]; + tensor var_492_to_fp16 = const()[name = tensor("op_492_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_493_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = tensor("op_493_cast_fp16")]; + tensor var_495_equation_0 = const()[name = tensor("op_495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_495_cast_fp16 = einsum(equation = var_495_equation_0, values = (var_301_cast_fp16, var_207_cast_fp16))[name = tensor("op_495_cast_fp16")]; + tensor var_496_to_fp16 = const()[name = tensor("op_496_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_497_cast_fp16 = mul(x = var_495_cast_fp16, y = var_496_to_fp16)[name = tensor("op_497_cast_fp16")]; + tensor var_499_equation_0 = const()[name = tensor("op_499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_499_cast_fp16 = einsum(equation = var_499_equation_0, values = (var_301_cast_fp16, var_211_cast_fp16))[name = tensor("op_499_cast_fp16")]; + tensor var_500_to_fp16 = const()[name = tensor("op_500_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_501_cast_fp16 = mul(x = var_499_cast_fp16, y = var_500_to_fp16)[name = tensor("op_501_cast_fp16")]; + tensor var_503_equation_0 = const()[name = tensor("op_503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_503_cast_fp16 = einsum(equation = var_503_equation_0, values = (var_313_cast_fp16, var_215_cast_fp16))[name = tensor("op_503_cast_fp16")]; + tensor var_504_to_fp16 = const()[name = tensor("op_504_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_505_cast_fp16 = mul(x = var_503_cast_fp16, y = var_504_to_fp16)[name = tensor("op_505_cast_fp16")]; + tensor var_507_equation_0 = const()[name = tensor("op_507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_507_cast_fp16 = einsum(equation = var_507_equation_0, values = (var_313_cast_fp16, var_219_cast_fp16))[name = tensor("op_507_cast_fp16")]; + tensor var_508_to_fp16 = const()[name = tensor("op_508_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16, y = var_508_to_fp16)[name = tensor("op_509_cast_fp16")]; + tensor var_511_equation_0 = const()[name = tensor("op_511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_511_cast_fp16 = einsum(equation = var_511_equation_0, values = (var_313_cast_fp16, var_223_cast_fp16))[name = tensor("op_511_cast_fp16")]; + tensor var_512_to_fp16 = const()[name = tensor("op_512_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_513_cast_fp16 = mul(x = var_511_cast_fp16, y = var_512_to_fp16)[name = tensor("op_513_cast_fp16")]; + tensor var_515_equation_0 = const()[name = tensor("op_515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_515_cast_fp16 = einsum(equation = var_515_equation_0, values = (var_325_cast_fp16, var_227_cast_fp16))[name = tensor("op_515_cast_fp16")]; + tensor var_516_to_fp16 = const()[name = tensor("op_516_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_517_cast_fp16 = mul(x = var_515_cast_fp16, y = var_516_to_fp16)[name = tensor("op_517_cast_fp16")]; + tensor var_519_equation_0 = const()[name = tensor("op_519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_519_cast_fp16 = einsum(equation = var_519_equation_0, values = (var_325_cast_fp16, var_231_cast_fp16))[name = tensor("op_519_cast_fp16")]; + tensor var_520_to_fp16 = const()[name = tensor("op_520_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_521_cast_fp16 = mul(x = var_519_cast_fp16, y = var_520_to_fp16)[name = tensor("op_521_cast_fp16")]; + tensor var_523_equation_0 = const()[name = tensor("op_523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_325_cast_fp16, var_235_cast_fp16))[name = tensor("op_523_cast_fp16")]; + tensor var_524_to_fp16 = const()[name = tensor("op_524_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_525_cast_fp16 = mul(x = var_523_cast_fp16, y = var_524_to_fp16)[name = tensor("op_525_cast_fp16")]; + tensor var_527_equation_0 = const()[name = tensor("op_527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_337_cast_fp16, var_239_cast_fp16))[name = tensor("op_527_cast_fp16")]; + tensor var_528_to_fp16 = const()[name = tensor("op_528_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_529_cast_fp16 = mul(x = var_527_cast_fp16, y = var_528_to_fp16)[name = tensor("op_529_cast_fp16")]; + tensor var_531_equation_0 = const()[name = tensor("op_531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_337_cast_fp16, var_243_cast_fp16))[name = tensor("op_531_cast_fp16")]; + tensor var_532_to_fp16 = const()[name = tensor("op_532_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_533_cast_fp16 = mul(x = var_531_cast_fp16, y = var_532_to_fp16)[name = tensor("op_533_cast_fp16")]; + tensor var_535_equation_0 = const()[name = tensor("op_535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_337_cast_fp16, var_247_cast_fp16))[name = tensor("op_535_cast_fp16")]; + tensor var_536_to_fp16 = const()[name = tensor("op_536_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_537_cast_fp16 = mul(x = var_535_cast_fp16, y = var_536_to_fp16)[name = tensor("op_537_cast_fp16")]; + tensor var_539_equation_0 = const()[name = tensor("op_539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_539_cast_fp16 = einsum(equation = var_539_equation_0, values = (var_349_cast_fp16, var_251_cast_fp16))[name = tensor("op_539_cast_fp16")]; + tensor var_540_to_fp16 = const()[name = tensor("op_540_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_541_cast_fp16 = mul(x = var_539_cast_fp16, y = var_540_to_fp16)[name = tensor("op_541_cast_fp16")]; + tensor var_543_equation_0 = const()[name = tensor("op_543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_543_cast_fp16 = einsum(equation = var_543_equation_0, values = (var_349_cast_fp16, var_255_cast_fp16))[name = tensor("op_543_cast_fp16")]; + tensor var_544_to_fp16 = const()[name = tensor("op_544_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_545_cast_fp16 = mul(x = var_543_cast_fp16, y = var_544_to_fp16)[name = tensor("op_545_cast_fp16")]; + tensor var_547_equation_0 = const()[name = tensor("op_547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_349_cast_fp16, var_259_cast_fp16))[name = tensor("op_547_cast_fp16")]; + tensor var_548_to_fp16 = const()[name = tensor("op_548_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_549_cast_fp16 = mul(x = var_547_cast_fp16, y = var_548_to_fp16)[name = tensor("op_549_cast_fp16")]; + tensor var_551_equation_0 = const()[name = tensor("op_551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_361_cast_fp16, var_263_cast_fp16))[name = tensor("op_551_cast_fp16")]; + tensor var_552_to_fp16 = const()[name = tensor("op_552_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_553_cast_fp16 = mul(x = var_551_cast_fp16, y = var_552_to_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_555_equation_0 = const()[name = tensor("op_555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_361_cast_fp16, var_267_cast_fp16))[name = tensor("op_555_cast_fp16")]; + tensor var_556_to_fp16 = const()[name = tensor("op_556_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_557_cast_fp16 = mul(x = var_555_cast_fp16, y = var_556_to_fp16)[name = tensor("op_557_cast_fp16")]; + tensor var_559_equation_0 = const()[name = tensor("op_559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_361_cast_fp16, var_271_cast_fp16))[name = tensor("op_559_cast_fp16")]; + tensor var_560_to_fp16 = const()[name = tensor("op_560_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = var_560_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor var_563_equation_0 = const()[name = tensor("op_563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_373_cast_fp16, var_275_cast_fp16))[name = tensor("op_563_cast_fp16")]; + tensor var_564_to_fp16 = const()[name = tensor("op_564_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_565_cast_fp16 = mul(x = var_563_cast_fp16, y = var_564_to_fp16)[name = tensor("op_565_cast_fp16")]; + tensor var_567_equation_0 = const()[name = tensor("op_567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_373_cast_fp16, var_279_cast_fp16))[name = tensor("op_567_cast_fp16")]; + tensor var_568_to_fp16 = const()[name = tensor("op_568_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_569_cast_fp16 = mul(x = var_567_cast_fp16, y = var_568_to_fp16)[name = tensor("op_569_cast_fp16")]; + tensor var_571_equation_0 = const()[name = tensor("op_571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_373_cast_fp16, var_283_cast_fp16))[name = tensor("op_571_cast_fp16")]; + tensor var_572_to_fp16 = const()[name = tensor("op_572_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_573_cast_fp16 = mul(x = var_571_cast_fp16, y = var_572_to_fp16)[name = tensor("op_573_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_481_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_485_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_489_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_493_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_497_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_501_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_505_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_509_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_513_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_517_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_521_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_525_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_529_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_533_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_537_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_541_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_545_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_549_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_553_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_557_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_561_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_565_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_569_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_573_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor var_598_cast_fp16 = softmax(axis = var_52, x = aw_1_cast_fp16)[name = tensor("op_598_cast_fp16")]; + tensor var_599_cast_fp16 = softmax(axis = var_52, x = aw_3_cast_fp16)[name = tensor("op_599_cast_fp16")]; + tensor var_600_cast_fp16 = softmax(axis = var_52, x = aw_5_cast_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_601_cast_fp16 = softmax(axis = var_52, x = aw_7_cast_fp16)[name = tensor("op_601_cast_fp16")]; + tensor var_602_cast_fp16 = softmax(axis = var_52, x = aw_9_cast_fp16)[name = tensor("op_602_cast_fp16")]; + tensor var_603_cast_fp16 = softmax(axis = var_52, x = aw_11_cast_fp16)[name = tensor("op_603_cast_fp16")]; + tensor var_604_cast_fp16 = softmax(axis = var_52, x = aw_13_cast_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_605_cast_fp16 = softmax(axis = var_52, x = aw_15_cast_fp16)[name = tensor("op_605_cast_fp16")]; + tensor var_606_cast_fp16 = softmax(axis = var_52, x = aw_17_cast_fp16)[name = tensor("op_606_cast_fp16")]; + tensor var_607_cast_fp16 = softmax(axis = var_52, x = aw_19_cast_fp16)[name = tensor("op_607_cast_fp16")]; + tensor var_608_cast_fp16 = softmax(axis = var_52, x = aw_21_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_609_cast_fp16 = softmax(axis = var_52, x = aw_23_cast_fp16)[name = tensor("op_609_cast_fp16")]; + tensor var_610_cast_fp16 = softmax(axis = var_52, x = aw_25_cast_fp16)[name = tensor("op_610_cast_fp16")]; + tensor var_611_cast_fp16 = softmax(axis = var_52, x = aw_27_cast_fp16)[name = tensor("op_611_cast_fp16")]; + tensor var_612_cast_fp16 = softmax(axis = var_52, x = aw_29_cast_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_613_cast_fp16 = softmax(axis = var_52, x = aw_31_cast_fp16)[name = tensor("op_613_cast_fp16")]; + tensor var_614_cast_fp16 = softmax(axis = var_52, x = aw_33_cast_fp16)[name = tensor("op_614_cast_fp16")]; + tensor var_615_cast_fp16 = softmax(axis = var_52, x = aw_35_cast_fp16)[name = tensor("op_615_cast_fp16")]; + tensor var_616_cast_fp16 = softmax(axis = var_52, x = aw_37_cast_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_617_cast_fp16 = softmax(axis = var_52, x = aw_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618_cast_fp16 = softmax(axis = var_52, x = aw_41_cast_fp16)[name = tensor("op_618_cast_fp16")]; + tensor var_619_cast_fp16 = softmax(axis = var_52, x = aw_43_cast_fp16)[name = tensor("op_619_cast_fp16")]; + tensor var_620_cast_fp16 = softmax(axis = var_52, x = aw_45_cast_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_621_cast_fp16 = softmax(axis = var_52, x = aw_47_cast_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_623_equation_0 = const()[name = tensor("op_623_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_383_cast_fp16, var_598_cast_fp16))[name = tensor("op_623_cast_fp16")]; + tensor var_625_equation_0 = const()[name = tensor("op_625_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_383_cast_fp16, var_599_cast_fp16))[name = tensor("op_625_cast_fp16")]; + tensor var_627_equation_0 = const()[name = tensor("op_627_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_383_cast_fp16, var_600_cast_fp16))[name = tensor("op_627_cast_fp16")]; + tensor var_629_equation_0 = const()[name = tensor("op_629_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_395_cast_fp16, var_601_cast_fp16))[name = tensor("op_629_cast_fp16")]; + tensor var_631_equation_0 = const()[name = tensor("op_631_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_395_cast_fp16, var_602_cast_fp16))[name = tensor("op_631_cast_fp16")]; + tensor var_633_equation_0 = const()[name = tensor("op_633_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_395_cast_fp16, var_603_cast_fp16))[name = tensor("op_633_cast_fp16")]; + tensor var_635_equation_0 = const()[name = tensor("op_635_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_407_cast_fp16, var_604_cast_fp16))[name = tensor("op_635_cast_fp16")]; + tensor var_637_equation_0 = const()[name = tensor("op_637_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_407_cast_fp16, var_605_cast_fp16))[name = tensor("op_637_cast_fp16")]; + tensor var_639_equation_0 = const()[name = tensor("op_639_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_407_cast_fp16, var_606_cast_fp16))[name = tensor("op_639_cast_fp16")]; + tensor var_641_equation_0 = const()[name = tensor("op_641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_419_cast_fp16, var_607_cast_fp16))[name = tensor("op_641_cast_fp16")]; + tensor var_643_equation_0 = const()[name = tensor("op_643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_419_cast_fp16, var_608_cast_fp16))[name = tensor("op_643_cast_fp16")]; + tensor var_645_equation_0 = const()[name = tensor("op_645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_419_cast_fp16, var_609_cast_fp16))[name = tensor("op_645_cast_fp16")]; + tensor var_647_equation_0 = const()[name = tensor("op_647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_647_cast_fp16 = einsum(equation = var_647_equation_0, values = (var_431_cast_fp16, var_610_cast_fp16))[name = tensor("op_647_cast_fp16")]; + tensor var_649_equation_0 = const()[name = tensor("op_649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_649_cast_fp16 = einsum(equation = var_649_equation_0, values = (var_431_cast_fp16, var_611_cast_fp16))[name = tensor("op_649_cast_fp16")]; + tensor var_651_equation_0 = const()[name = tensor("op_651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_651_cast_fp16 = einsum(equation = var_651_equation_0, values = (var_431_cast_fp16, var_612_cast_fp16))[name = tensor("op_651_cast_fp16")]; + tensor var_653_equation_0 = const()[name = tensor("op_653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_653_cast_fp16 = einsum(equation = var_653_equation_0, values = (var_443_cast_fp16, var_613_cast_fp16))[name = tensor("op_653_cast_fp16")]; + tensor var_655_equation_0 = const()[name = tensor("op_655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_655_cast_fp16 = einsum(equation = var_655_equation_0, values = (var_443_cast_fp16, var_614_cast_fp16))[name = tensor("op_655_cast_fp16")]; + tensor var_657_equation_0 = const()[name = tensor("op_657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_657_cast_fp16 = einsum(equation = var_657_equation_0, values = (var_443_cast_fp16, var_615_cast_fp16))[name = tensor("op_657_cast_fp16")]; + tensor var_659_equation_0 = const()[name = tensor("op_659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_659_cast_fp16 = einsum(equation = var_659_equation_0, values = (var_455_cast_fp16, var_616_cast_fp16))[name = tensor("op_659_cast_fp16")]; + tensor var_661_equation_0 = const()[name = tensor("op_661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_661_cast_fp16 = einsum(equation = var_661_equation_0, values = (var_455_cast_fp16, var_617_cast_fp16))[name = tensor("op_661_cast_fp16")]; + tensor var_663_equation_0 = const()[name = tensor("op_663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_663_cast_fp16 = einsum(equation = var_663_equation_0, values = (var_455_cast_fp16, var_618_cast_fp16))[name = tensor("op_663_cast_fp16")]; + tensor var_665_equation_0 = const()[name = tensor("op_665_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_665_cast_fp16 = einsum(equation = var_665_equation_0, values = (var_467_cast_fp16, var_619_cast_fp16))[name = tensor("op_665_cast_fp16")]; + tensor var_667_equation_0 = const()[name = tensor("op_667_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_667_cast_fp16 = einsum(equation = var_667_equation_0, values = (var_467_cast_fp16, var_620_cast_fp16))[name = tensor("op_667_cast_fp16")]; + tensor var_669_equation_0 = const()[name = tensor("op_669_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_669_cast_fp16 = einsum(equation = var_669_equation_0, values = (var_467_cast_fp16, var_621_cast_fp16))[name = tensor("op_669_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_52, interleave = x_11_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16, var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16, var_647_cast_fp16, var_649_cast_fp16, var_651_cast_fp16, var_653_cast_fp16, var_655_cast_fp16, var_657_cast_fp16, var_659_cast_fp16, var_661_cast_fp16, var_663_cast_fp16, var_665_cast_fp16, var_667_cast_fp16, var_669_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_674 = const()[name = tensor("op_674"), val = tensor([1, 3072, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_674, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_677 = const()[name = tensor("op_677"), val = tensor([1, 1])]; + tensor var_679 = const()[name = tensor("op_679"), val = tensor([1, 1])]; + tensor attention_output_1_pad_type_0 = const()[name = tensor("attention_output_1_pad_type_0"), val = tensor("custom")]; + tensor attention_output_1_pad_0 = const()[name = tensor("attention_output_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31463936)))]; + tensor attention_output_1_cast_fp16 = conv(dilations = var_679, groups = var_52, pad = attention_output_1_pad_0, pad_type = attention_output_1_pad_type_0, strides = var_677, weight = blocks_0_attn_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338368)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_52, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_55, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_705_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338560)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_716 = const()[name = tensor("op_716"), val = tensor([1, 1])]; + tensor var_718 = const()[name = tensor("op_718"), val = tensor([1, 1])]; + tensor input_7_pad_type_0 = const()[name = tensor("input_7_pad_type_0"), val = tensor("custom")]; + tensor input_7_pad_0 = const()[name = tensor("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50344768)))]; + tensor input_7_cast_fp16 = conv(dilations = var_718, groups = var_52, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_716, weight = blocks_0_mlp_fc_1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_722 = const()[name = tensor("op_722"), val = tensor([1, 1])]; + tensor var_724 = const()[name = tensor("op_724"), val = tensor([1, 1])]; + tensor x_fc_2_1_pad_type_0 = const()[name = tensor("x_fc_2_1_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_1_pad_0 = const()[name = tensor("x_fc_2_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100676480)))]; + tensor x_fc_2_1_cast_fp16 = conv(dilations = var_724, groups = var_52, pad = x_fc_2_1_pad_0, pad_type = x_fc_2_1_pad_type_0, strides = var_722, weight = blocks_0_mlp_fc_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_727_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_727_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_727_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_730 = const()[name = tensor("op_730"), val = tensor([1, 1])]; + tensor var_732 = const()[name = tensor("op_732"), val = tensor([1, 1])]; + tensor var_734_pad_type_0 = const()[name = tensor("op_734_pad_type_0"), val = tensor("custom")]; + tensor var_734_pad_0 = const()[name = tensor("op_734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151008192)))]; + tensor var_734_cast_fp16 = conv(dilations = var_732, groups = var_52, pad = var_734_pad_0, pad_type = var_734_pad_type_0, strides = var_730, weight = blocks_0_mlp_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("op_734_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_734_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_740 = const()[name = tensor("op_740"), val = tensor(-1)]; + tensor var_744 = const()[name = tensor("op_744"), val = tensor(-2)]; + tensor var_746 = const()[name = tensor("op_746"), val = tensor(-3)]; + tensor var_779 = const()[name = tensor("op_779"), val = tensor(1)]; + tensor var_782 = const()[name = tensor("op_782"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201339904)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_779, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_782, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_805_to_fp16 = const()[name = tensor("op_805_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_805_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201340096)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_829 = const()[name = tensor("op_829"), val = tensor([1, 3072, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_829, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_832 = const()[name = tensor("op_832"), val = tensor([1, 1])]; + tensor var_834 = const()[name = tensor("op_834"), val = tensor([1, 1])]; + tensor q_9_pad_type_0 = const()[name = tensor("q_9_pad_type_0"), val = tensor("custom")]; + tensor q_9_pad_0 = const()[name = tensor("q_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201346304)))]; + tensor q_9_cast_fp16 = conv(dilations = var_834, groups = var_779, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = var_832, weight = blocks_1_attn_q_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_838 = const()[name = tensor("op_838"), val = tensor([1, 1])]; + tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 1])]; + tensor k_13_pad_type_0 = const()[name = tensor("k_13_pad_type_0"), val = tensor("custom")]; + tensor k_13_pad_0 = const()[name = tensor("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220220736)))]; + tensor k_13_cast_fp16 = conv(dilations = var_840, groups = var_779, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = var_838, weight = blocks_1_attn_k_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_844 = const()[name = tensor("op_844"), val = tensor([1, 1])]; + tensor var_846 = const()[name = tensor("op_846"), val = tensor([1, 1])]; + tensor v_11_pad_type_0 = const()[name = tensor("v_11_pad_type_0"), val = tensor("custom")]; + tensor v_11_pad_0 = const()[name = tensor("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226512256)))]; + tensor v_11_cast_fp16 = conv(dilations = var_846, groups = var_779, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = var_844, weight = blocks_1_attn_v_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_849 = const()[name = tensor("op_849"), val = tensor([1, 24, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_849, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_851 = const()[name = tensor("op_851"), val = tensor([1, -1, 128, 64])]; + tensor k_15_cast_fp16 = reshape(shape = var_851, x = k_13_cast_fp16)[name = tensor("k_15_cast_fp16")]; + tensor var_865_begin_0 = const()[name = tensor("op_865_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_865_end_0 = const()[name = tensor("op_865_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_865_end_mask_0 = const()[name = tensor("op_865_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_865_cast_fp16")]; + tensor var_871_begin_0 = const()[name = tensor("op_871_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_871_end_0 = const()[name = tensor("op_871_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_871_end_mask_0 = const()[name = tensor("op_871_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_871_cast_fp16 = slice_by_index(begin = var_871_begin_0, end = var_871_end_0, end_mask = var_871_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_871_cast_fp16")]; + tensor const_30_promoted_to_fp16 = const()[name = tensor("const_30_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_873_cast_fp16 = mul(x = var_871_cast_fp16, y = const_30_promoted_to_fp16)[name = tensor("op_873_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_744, interleave = rotated_5_interleave_0, values = (var_873_cast_fp16, var_865_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_876_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_876_cast_fp16")]; + tensor var_877_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_877_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_876_cast_fp16, y = var_877_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_890_begin_0 = const()[name = tensor("op_890_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_890_end_0 = const()[name = tensor("op_890_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_890_end_mask_0 = const()[name = tensor("op_890_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_890_cast_fp16 = slice_by_index(begin = var_890_begin_0, end = var_890_end_0, end_mask = var_890_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_890_cast_fp16")]; + tensor var_896_begin_0 = const()[name = tensor("op_896_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_896_end_0 = const()[name = tensor("op_896_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_896_end_mask_0 = const()[name = tensor("op_896_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_896_cast_fp16 = slice_by_index(begin = var_896_begin_0, end = var_896_end_0, end_mask = var_896_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_896_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_898_cast_fp16 = mul(x = var_896_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_898_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_744, interleave = rotated_interleave_0, values = (var_898_cast_fp16, var_890_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_901_cast_fp16 = mul(x = k_15_cast_fp16, y = cos)[name = tensor("op_901_cast_fp16")]; + tensor var_902_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_902_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_901_cast_fp16, y = var_902_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_905 = const()[name = tensor("op_905"), val = tensor([1, -1, 1, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_905, x = roped_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_907 = const()[name = tensor("op_907"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_907, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_21_perm_0, x = k_19_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_746, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_740, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_915 = const()[name = tensor("op_915"), val = tensor([1, 3072, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_915, x = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_920_begin_0 = const()[name = tensor("op_920_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_end_0 = const()[name = tensor("op_920_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_920_end_mask_0 = const()[name = tensor("op_920_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = q_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor var_924_begin_0 = const()[name = tensor("op_924_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_924_end_0 = const()[name = tensor("op_924_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_924_end_mask_0 = const()[name = tensor("op_924_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_924_cast_fp16 = slice_by_index(begin = var_924_begin_0, end = var_924_end_0, end_mask = var_924_end_mask_0, x = q_cast_fp16)[name = tensor("op_924_cast_fp16")]; + tensor var_928_begin_0 = const()[name = tensor("op_928_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_928_end_0 = const()[name = tensor("op_928_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_928_end_mask_0 = const()[name = tensor("op_928_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = q_cast_fp16)[name = tensor("op_928_cast_fp16")]; + tensor var_932_begin_0 = const()[name = tensor("op_932_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_932_end_0 = const()[name = tensor("op_932_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_932_end_mask_0 = const()[name = tensor("op_932_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_932_cast_fp16 = slice_by_index(begin = var_932_begin_0, end = var_932_end_0, end_mask = var_932_end_mask_0, x = q_cast_fp16)[name = tensor("op_932_cast_fp16")]; + tensor var_936_begin_0 = const()[name = tensor("op_936_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_936_end_0 = const()[name = tensor("op_936_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_936_end_mask_0 = const()[name = tensor("op_936_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_936_cast_fp16 = slice_by_index(begin = var_936_begin_0, end = var_936_end_0, end_mask = var_936_end_mask_0, x = q_cast_fp16)[name = tensor("op_936_cast_fp16")]; + tensor var_940_begin_0 = const()[name = tensor("op_940_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_940_end_0 = const()[name = tensor("op_940_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_940_end_mask_0 = const()[name = tensor("op_940_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_940_cast_fp16 = slice_by_index(begin = var_940_begin_0, end = var_940_end_0, end_mask = var_940_end_mask_0, x = q_cast_fp16)[name = tensor("op_940_cast_fp16")]; + tensor var_944_begin_0 = const()[name = tensor("op_944_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_944_end_0 = const()[name = tensor("op_944_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_944_end_mask_0 = const()[name = tensor("op_944_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_944_cast_fp16 = slice_by_index(begin = var_944_begin_0, end = var_944_end_0, end_mask = var_944_end_mask_0, x = q_cast_fp16)[name = tensor("op_944_cast_fp16")]; + tensor var_948_begin_0 = const()[name = tensor("op_948_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_948_end_0 = const()[name = tensor("op_948_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_948_end_mask_0 = const()[name = tensor("op_948_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_948_cast_fp16 = slice_by_index(begin = var_948_begin_0, end = var_948_end_0, end_mask = var_948_end_mask_0, x = q_cast_fp16)[name = tensor("op_948_cast_fp16")]; + tensor var_952_begin_0 = const()[name = tensor("op_952_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_952_end_0 = const()[name = tensor("op_952_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_952_end_mask_0 = const()[name = tensor("op_952_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_952_cast_fp16 = slice_by_index(begin = var_952_begin_0, end = var_952_end_0, end_mask = var_952_end_mask_0, x = q_cast_fp16)[name = tensor("op_952_cast_fp16")]; + tensor var_956_begin_0 = const()[name = tensor("op_956_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_956_end_0 = const()[name = tensor("op_956_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_956_end_mask_0 = const()[name = tensor("op_956_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_956_cast_fp16 = slice_by_index(begin = var_956_begin_0, end = var_956_end_0, end_mask = var_956_end_mask_0, x = q_cast_fp16)[name = tensor("op_956_cast_fp16")]; + tensor var_960_begin_0 = const()[name = tensor("op_960_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_960_end_0 = const()[name = tensor("op_960_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_960_end_mask_0 = const()[name = tensor("op_960_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_960_cast_fp16 = slice_by_index(begin = var_960_begin_0, end = var_960_end_0, end_mask = var_960_end_mask_0, x = q_cast_fp16)[name = tensor("op_960_cast_fp16")]; + tensor var_964_begin_0 = const()[name = tensor("op_964_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_964_end_0 = const()[name = tensor("op_964_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_964_end_mask_0 = const()[name = tensor("op_964_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_964_cast_fp16 = slice_by_index(begin = var_964_begin_0, end = var_964_end_0, end_mask = var_964_end_mask_0, x = q_cast_fp16)[name = tensor("op_964_cast_fp16")]; + tensor var_968_begin_0 = const()[name = tensor("op_968_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_968_end_0 = const()[name = tensor("op_968_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_968_end_mask_0 = const()[name = tensor("op_968_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = q_cast_fp16)[name = tensor("op_968_cast_fp16")]; + tensor var_972_begin_0 = const()[name = tensor("op_972_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_972_end_0 = const()[name = tensor("op_972_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_972_end_mask_0 = const()[name = tensor("op_972_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = q_cast_fp16)[name = tensor("op_972_cast_fp16")]; + tensor var_976_begin_0 = const()[name = tensor("op_976_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_976_end_0 = const()[name = tensor("op_976_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_976_end_mask_0 = const()[name = tensor("op_976_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = var_976_end_0, end_mask = var_976_end_mask_0, x = q_cast_fp16)[name = tensor("op_976_cast_fp16")]; + tensor var_980_begin_0 = const()[name = tensor("op_980_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_980_end_0 = const()[name = tensor("op_980_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_980_end_mask_0 = const()[name = tensor("op_980_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_980_cast_fp16 = slice_by_index(begin = var_980_begin_0, end = var_980_end_0, end_mask = var_980_end_mask_0, x = q_cast_fp16)[name = tensor("op_980_cast_fp16")]; + tensor var_984_begin_0 = const()[name = tensor("op_984_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_984_end_0 = const()[name = tensor("op_984_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_984_end_mask_0 = const()[name = tensor("op_984_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_984_cast_fp16 = slice_by_index(begin = var_984_begin_0, end = var_984_end_0, end_mask = var_984_end_mask_0, x = q_cast_fp16)[name = tensor("op_984_cast_fp16")]; + tensor var_988_begin_0 = const()[name = tensor("op_988_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_988_end_0 = const()[name = tensor("op_988_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_988_end_mask_0 = const()[name = tensor("op_988_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_988_cast_fp16 = slice_by_index(begin = var_988_begin_0, end = var_988_end_0, end_mask = var_988_end_mask_0, x = q_cast_fp16)[name = tensor("op_988_cast_fp16")]; + tensor var_992_begin_0 = const()[name = tensor("op_992_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_992_end_0 = const()[name = tensor("op_992_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_992_end_mask_0 = const()[name = tensor("op_992_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_992_cast_fp16 = slice_by_index(begin = var_992_begin_0, end = var_992_end_0, end_mask = var_992_end_mask_0, x = q_cast_fp16)[name = tensor("op_992_cast_fp16")]; + tensor var_996_begin_0 = const()[name = tensor("op_996_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_996_end_0 = const()[name = tensor("op_996_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_996_end_mask_0 = const()[name = tensor("op_996_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_996_cast_fp16 = slice_by_index(begin = var_996_begin_0, end = var_996_end_0, end_mask = var_996_end_mask_0, x = q_cast_fp16)[name = tensor("op_996_cast_fp16")]; + tensor var_1000_begin_0 = const()[name = tensor("op_1000_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1000_end_0 = const()[name = tensor("op_1000_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1000_end_mask_0 = const()[name = tensor("op_1000_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = var_1000_end_0, end_mask = var_1000_end_mask_0, x = q_cast_fp16)[name = tensor("op_1000_cast_fp16")]; + tensor var_1004_begin_0 = const()[name = tensor("op_1004_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1004_end_0 = const()[name = tensor("op_1004_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1004_end_mask_0 = const()[name = tensor("op_1004_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1004_cast_fp16 = slice_by_index(begin = var_1004_begin_0, end = var_1004_end_0, end_mask = var_1004_end_mask_0, x = q_cast_fp16)[name = tensor("op_1004_cast_fp16")]; + tensor var_1008_begin_0 = const()[name = tensor("op_1008_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1008_end_0 = const()[name = tensor("op_1008_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1008_end_mask_0 = const()[name = tensor("op_1008_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = var_1008_end_0, end_mask = var_1008_end_mask_0, x = q_cast_fp16)[name = tensor("op_1008_cast_fp16")]; + tensor var_1012_begin_0 = const()[name = tensor("op_1012_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1012_end_0 = const()[name = tensor("op_1012_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1012_end_mask_0 = const()[name = tensor("op_1012_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1012_cast_fp16 = slice_by_index(begin = var_1012_begin_0, end = var_1012_end_0, end_mask = var_1012_end_mask_0, x = q_cast_fp16)[name = tensor("op_1012_cast_fp16")]; + tensor var_1018_begin_0 = const()[name = tensor("op_1018_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1018_end_0 = const()[name = tensor("op_1018_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1018_end_mask_0 = const()[name = tensor("op_1018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1018_cast_fp16 = slice_by_index(begin = var_1018_begin_0, end = var_1018_end_0, end_mask = var_1018_end_mask_0, x = k_cast_fp16)[name = tensor("op_1018_cast_fp16")]; + tensor var_1030_begin_0 = const()[name = tensor("op_1030_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1030_end_0 = const()[name = tensor("op_1030_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1030_end_mask_0 = const()[name = tensor("op_1030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, x = k_cast_fp16)[name = tensor("op_1030_cast_fp16")]; + tensor var_1042_begin_0 = const()[name = tensor("op_1042_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1042_end_0 = const()[name = tensor("op_1042_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1042_end_mask_0 = const()[name = tensor("op_1042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = k_cast_fp16)[name = tensor("op_1042_cast_fp16")]; + tensor var_1054_begin_0 = const()[name = tensor("op_1054_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1054_end_0 = const()[name = tensor("op_1054_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1054_end_mask_0 = const()[name = tensor("op_1054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1054_cast_fp16 = slice_by_index(begin = var_1054_begin_0, end = var_1054_end_0, end_mask = var_1054_end_mask_0, x = k_cast_fp16)[name = tensor("op_1054_cast_fp16")]; + tensor var_1066_begin_0 = const()[name = tensor("op_1066_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1066_end_0 = const()[name = tensor("op_1066_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1066_end_mask_0 = const()[name = tensor("op_1066_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1066_cast_fp16 = slice_by_index(begin = var_1066_begin_0, end = var_1066_end_0, end_mask = var_1066_end_mask_0, x = k_cast_fp16)[name = tensor("op_1066_cast_fp16")]; + tensor var_1078_begin_0 = const()[name = tensor("op_1078_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1078_end_0 = const()[name = tensor("op_1078_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1078_end_mask_0 = const()[name = tensor("op_1078_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1078_cast_fp16 = slice_by_index(begin = var_1078_begin_0, end = var_1078_end_0, end_mask = var_1078_end_mask_0, x = k_cast_fp16)[name = tensor("op_1078_cast_fp16")]; + tensor var_1090_begin_0 = const()[name = tensor("op_1090_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1090_end_0 = const()[name = tensor("op_1090_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1090_end_mask_0 = const()[name = tensor("op_1090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = k_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor var_1102_begin_0 = const()[name = tensor("op_1102_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1102_end_0 = const()[name = tensor("op_1102_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1102_end_mask_0 = const()[name = tensor("op_1102_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1102_cast_fp16 = slice_by_index(begin = var_1102_begin_0, end = var_1102_end_0, end_mask = var_1102_end_mask_0, x = k_cast_fp16)[name = tensor("op_1102_cast_fp16")]; + tensor var_1112_begin_0 = const()[name = tensor("op_1112_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1112_end_0 = const()[name = tensor("op_1112_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1112_end_mask_0 = const()[name = tensor("op_1112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1112_cast_fp16")]; + tensor var_1124_begin_0 = const()[name = tensor("op_1124_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1124_end_0 = const()[name = tensor("op_1124_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1124_end_mask_0 = const()[name = tensor("op_1124_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1124_cast_fp16")]; + tensor var_1136_begin_0 = const()[name = tensor("op_1136_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1136_end_0 = const()[name = tensor("op_1136_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1136_end_mask_0 = const()[name = tensor("op_1136_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1136_cast_fp16")]; + tensor var_1148_begin_0 = const()[name = tensor("op_1148_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1148_end_0 = const()[name = tensor("op_1148_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1148_end_mask_0 = const()[name = tensor("op_1148_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1148_cast_fp16")]; + tensor var_1160_begin_0 = const()[name = tensor("op_1160_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1160_end_0 = const()[name = tensor("op_1160_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1160_end_mask_0 = const()[name = tensor("op_1160_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1160_cast_fp16")]; + tensor var_1172_begin_0 = const()[name = tensor("op_1172_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1172_end_0 = const()[name = tensor("op_1172_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1172_end_mask_0 = const()[name = tensor("op_1172_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1172_cast_fp16 = slice_by_index(begin = var_1172_begin_0, end = var_1172_end_0, end_mask = var_1172_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1172_cast_fp16")]; + tensor var_1184_begin_0 = const()[name = tensor("op_1184_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1184_end_0 = const()[name = tensor("op_1184_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1184_end_mask_0 = const()[name = tensor("op_1184_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1184_cast_fp16 = slice_by_index(begin = var_1184_begin_0, end = var_1184_end_0, end_mask = var_1184_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1184_cast_fp16")]; + tensor var_1196_begin_0 = const()[name = tensor("op_1196_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1196_end_0 = const()[name = tensor("op_1196_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1196_end_mask_0 = const()[name = tensor("op_1196_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1196_cast_fp16")]; + tensor var_1208_equation_0 = const()[name = tensor("op_1208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1208_cast_fp16 = einsum(equation = var_1208_equation_0, values = (var_1018_cast_fp16, var_920_cast_fp16))[name = tensor("op_1208_cast_fp16")]; + tensor var_1209_to_fp16 = const()[name = tensor("op_1209_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1210_cast_fp16 = mul(x = var_1208_cast_fp16, y = var_1209_to_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1212_equation_0 = const()[name = tensor("op_1212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1212_cast_fp16 = einsum(equation = var_1212_equation_0, values = (var_1018_cast_fp16, var_924_cast_fp16))[name = tensor("op_1212_cast_fp16")]; + tensor var_1213_to_fp16 = const()[name = tensor("op_1213_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1214_cast_fp16 = mul(x = var_1212_cast_fp16, y = var_1213_to_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1216_equation_0 = const()[name = tensor("op_1216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1216_cast_fp16 = einsum(equation = var_1216_equation_0, values = (var_1018_cast_fp16, var_928_cast_fp16))[name = tensor("op_1216_cast_fp16")]; + tensor var_1217_to_fp16 = const()[name = tensor("op_1217_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1218_cast_fp16 = mul(x = var_1216_cast_fp16, y = var_1217_to_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1220_equation_0 = const()[name = tensor("op_1220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1220_cast_fp16 = einsum(equation = var_1220_equation_0, values = (var_1030_cast_fp16, var_932_cast_fp16))[name = tensor("op_1220_cast_fp16")]; + tensor var_1221_to_fp16 = const()[name = tensor("op_1221_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1222_cast_fp16 = mul(x = var_1220_cast_fp16, y = var_1221_to_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1224_equation_0 = const()[name = tensor("op_1224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1224_cast_fp16 = einsum(equation = var_1224_equation_0, values = (var_1030_cast_fp16, var_936_cast_fp16))[name = tensor("op_1224_cast_fp16")]; + tensor var_1225_to_fp16 = const()[name = tensor("op_1225_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1226_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1030_cast_fp16, var_940_cast_fp16))[name = tensor("op_1228_cast_fp16")]; + tensor var_1229_to_fp16 = const()[name = tensor("op_1229_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1230_cast_fp16 = mul(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1042_cast_fp16, var_944_cast_fp16))[name = tensor("op_1232_cast_fp16")]; + tensor var_1233_to_fp16 = const()[name = tensor("op_1233_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1234_cast_fp16 = mul(x = var_1232_cast_fp16, y = var_1233_to_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1042_cast_fp16, var_948_cast_fp16))[name = tensor("op_1236_cast_fp16")]; + tensor var_1237_to_fp16 = const()[name = tensor("op_1237_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1238_cast_fp16 = mul(x = var_1236_cast_fp16, y = var_1237_to_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1042_cast_fp16, var_952_cast_fp16))[name = tensor("op_1240_cast_fp16")]; + tensor var_1241_to_fp16 = const()[name = tensor("op_1241_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1242_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1054_cast_fp16, var_956_cast_fp16))[name = tensor("op_1244_cast_fp16")]; + tensor var_1245_to_fp16 = const()[name = tensor("op_1245_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1246_cast_fp16 = mul(x = var_1244_cast_fp16, y = var_1245_to_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1054_cast_fp16, var_960_cast_fp16))[name = tensor("op_1248_cast_fp16")]; + tensor var_1249_to_fp16 = const()[name = tensor("op_1249_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1250_cast_fp16 = mul(x = var_1248_cast_fp16, y = var_1249_to_fp16)[name = tensor("op_1250_cast_fp16")]; + tensor var_1252_equation_0 = const()[name = tensor("op_1252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1054_cast_fp16, var_964_cast_fp16))[name = tensor("op_1252_cast_fp16")]; + tensor var_1253_to_fp16 = const()[name = tensor("op_1253_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1254_cast_fp16 = mul(x = var_1252_cast_fp16, y = var_1253_to_fp16)[name = tensor("op_1254_cast_fp16")]; + tensor var_1256_equation_0 = const()[name = tensor("op_1256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1066_cast_fp16, var_968_cast_fp16))[name = tensor("op_1256_cast_fp16")]; + tensor var_1257_to_fp16 = const()[name = tensor("op_1257_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1258_cast_fp16 = mul(x = var_1256_cast_fp16, y = var_1257_to_fp16)[name = tensor("op_1258_cast_fp16")]; + tensor var_1260_equation_0 = const()[name = tensor("op_1260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1066_cast_fp16, var_972_cast_fp16))[name = tensor("op_1260_cast_fp16")]; + tensor var_1261_to_fp16 = const()[name = tensor("op_1261_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1262_cast_fp16 = mul(x = var_1260_cast_fp16, y = var_1261_to_fp16)[name = tensor("op_1262_cast_fp16")]; + tensor var_1264_equation_0 = const()[name = tensor("op_1264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1066_cast_fp16, var_976_cast_fp16))[name = tensor("op_1264_cast_fp16")]; + tensor var_1265_to_fp16 = const()[name = tensor("op_1265_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1266_cast_fp16 = mul(x = var_1264_cast_fp16, y = var_1265_to_fp16)[name = tensor("op_1266_cast_fp16")]; + tensor var_1268_equation_0 = const()[name = tensor("op_1268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1078_cast_fp16, var_980_cast_fp16))[name = tensor("op_1268_cast_fp16")]; + tensor var_1269_to_fp16 = const()[name = tensor("op_1269_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1270_cast_fp16 = mul(x = var_1268_cast_fp16, y = var_1269_to_fp16)[name = tensor("op_1270_cast_fp16")]; + tensor var_1272_equation_0 = const()[name = tensor("op_1272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1078_cast_fp16, var_984_cast_fp16))[name = tensor("op_1272_cast_fp16")]; + tensor var_1273_to_fp16 = const()[name = tensor("op_1273_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1274_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = tensor("op_1274_cast_fp16")]; + tensor var_1276_equation_0 = const()[name = tensor("op_1276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1078_cast_fp16, var_988_cast_fp16))[name = tensor("op_1276_cast_fp16")]; + tensor var_1277_to_fp16 = const()[name = tensor("op_1277_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1278_cast_fp16 = mul(x = var_1276_cast_fp16, y = var_1277_to_fp16)[name = tensor("op_1278_cast_fp16")]; + tensor var_1280_equation_0 = const()[name = tensor("op_1280_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1090_cast_fp16, var_992_cast_fp16))[name = tensor("op_1280_cast_fp16")]; + tensor var_1281_to_fp16 = const()[name = tensor("op_1281_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1282_cast_fp16 = mul(x = var_1280_cast_fp16, y = var_1281_to_fp16)[name = tensor("op_1282_cast_fp16")]; + tensor var_1284_equation_0 = const()[name = tensor("op_1284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1284_cast_fp16 = einsum(equation = var_1284_equation_0, values = (var_1090_cast_fp16, var_996_cast_fp16))[name = tensor("op_1284_cast_fp16")]; + tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1286_cast_fp16 = mul(x = var_1284_cast_fp16, y = var_1285_to_fp16)[name = tensor("op_1286_cast_fp16")]; + tensor var_1288_equation_0 = const()[name = tensor("op_1288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1288_cast_fp16 = einsum(equation = var_1288_equation_0, values = (var_1090_cast_fp16, var_1000_cast_fp16))[name = tensor("op_1288_cast_fp16")]; + tensor var_1289_to_fp16 = const()[name = tensor("op_1289_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1290_cast_fp16 = mul(x = var_1288_cast_fp16, y = var_1289_to_fp16)[name = tensor("op_1290_cast_fp16")]; + tensor var_1292_equation_0 = const()[name = tensor("op_1292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1292_cast_fp16 = einsum(equation = var_1292_equation_0, values = (var_1102_cast_fp16, var_1004_cast_fp16))[name = tensor("op_1292_cast_fp16")]; + tensor var_1293_to_fp16 = const()[name = tensor("op_1293_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1294_cast_fp16 = mul(x = var_1292_cast_fp16, y = var_1293_to_fp16)[name = tensor("op_1294_cast_fp16")]; + tensor var_1296_equation_0 = const()[name = tensor("op_1296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1296_cast_fp16 = einsum(equation = var_1296_equation_0, values = (var_1102_cast_fp16, var_1008_cast_fp16))[name = tensor("op_1296_cast_fp16")]; + tensor var_1297_to_fp16 = const()[name = tensor("op_1297_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1298_cast_fp16 = mul(x = var_1296_cast_fp16, y = var_1297_to_fp16)[name = tensor("op_1298_cast_fp16")]; + tensor var_1300_equation_0 = const()[name = tensor("op_1300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1300_cast_fp16 = einsum(equation = var_1300_equation_0, values = (var_1102_cast_fp16, var_1012_cast_fp16))[name = tensor("op_1300_cast_fp16")]; + tensor var_1301_to_fp16 = const()[name = tensor("op_1301_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1302_cast_fp16 = mul(x = var_1300_cast_fp16, y = var_1301_to_fp16)[name = tensor("op_1302_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_1210_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_1214_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_1218_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_1222_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_1226_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_1230_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_1234_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_1238_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1242_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1246_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1250_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1254_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1258_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1262_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1266_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1270_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1274_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1278_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1282_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1286_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1290_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1294_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1298_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_1302_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_1327_cast_fp16 = softmax(axis = var_779, x = aw_49_cast_fp16)[name = tensor("op_1327_cast_fp16")]; + tensor var_1328_cast_fp16 = softmax(axis = var_779, x = aw_51_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1329_cast_fp16 = softmax(axis = var_779, x = aw_53_cast_fp16)[name = tensor("op_1329_cast_fp16")]; + tensor var_1330_cast_fp16 = softmax(axis = var_779, x = aw_55_cast_fp16)[name = tensor("op_1330_cast_fp16")]; + tensor var_1331_cast_fp16 = softmax(axis = var_779, x = aw_57_cast_fp16)[name = tensor("op_1331_cast_fp16")]; + tensor var_1332_cast_fp16 = softmax(axis = var_779, x = aw_59_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1333_cast_fp16 = softmax(axis = var_779, x = aw_61_cast_fp16)[name = tensor("op_1333_cast_fp16")]; + tensor var_1334_cast_fp16 = softmax(axis = var_779, x = aw_63_cast_fp16)[name = tensor("op_1334_cast_fp16")]; + tensor var_1335_cast_fp16 = softmax(axis = var_779, x = aw_65_cast_fp16)[name = tensor("op_1335_cast_fp16")]; + tensor var_1336_cast_fp16 = softmax(axis = var_779, x = aw_67_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1337_cast_fp16 = softmax(axis = var_779, x = aw_69_cast_fp16)[name = tensor("op_1337_cast_fp16")]; + tensor var_1338_cast_fp16 = softmax(axis = var_779, x = aw_71_cast_fp16)[name = tensor("op_1338_cast_fp16")]; + tensor var_1339_cast_fp16 = softmax(axis = var_779, x = aw_73_cast_fp16)[name = tensor("op_1339_cast_fp16")]; + tensor var_1340_cast_fp16 = softmax(axis = var_779, x = aw_75_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1341_cast_fp16 = softmax(axis = var_779, x = aw_77_cast_fp16)[name = tensor("op_1341_cast_fp16")]; + tensor var_1342_cast_fp16 = softmax(axis = var_779, x = aw_79_cast_fp16)[name = tensor("op_1342_cast_fp16")]; + tensor var_1343_cast_fp16 = softmax(axis = var_779, x = aw_81_cast_fp16)[name = tensor("op_1343_cast_fp16")]; + tensor var_1344_cast_fp16 = softmax(axis = var_779, x = aw_83_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1345_cast_fp16 = softmax(axis = var_779, x = aw_85_cast_fp16)[name = tensor("op_1345_cast_fp16")]; + tensor var_1346_cast_fp16 = softmax(axis = var_779, x = aw_87_cast_fp16)[name = tensor("op_1346_cast_fp16")]; + tensor var_1347_cast_fp16 = softmax(axis = var_779, x = aw_89_cast_fp16)[name = tensor("op_1347_cast_fp16")]; + tensor var_1348_cast_fp16 = softmax(axis = var_779, x = aw_91_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1349_cast_fp16 = softmax(axis = var_779, x = aw_93_cast_fp16)[name = tensor("op_1349_cast_fp16")]; + tensor var_1350_cast_fp16 = softmax(axis = var_779, x = aw_cast_fp16)[name = tensor("op_1350_cast_fp16")]; + tensor var_1352_equation_0 = const()[name = tensor("op_1352_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1352_cast_fp16 = einsum(equation = var_1352_equation_0, values = (var_1112_cast_fp16, var_1327_cast_fp16))[name = tensor("op_1352_cast_fp16")]; + tensor var_1354_equation_0 = const()[name = tensor("op_1354_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1354_cast_fp16 = einsum(equation = var_1354_equation_0, values = (var_1112_cast_fp16, var_1328_cast_fp16))[name = tensor("op_1354_cast_fp16")]; + tensor var_1356_equation_0 = const()[name = tensor("op_1356_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1356_cast_fp16 = einsum(equation = var_1356_equation_0, values = (var_1112_cast_fp16, var_1329_cast_fp16))[name = tensor("op_1356_cast_fp16")]; + tensor var_1358_equation_0 = const()[name = tensor("op_1358_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1358_cast_fp16 = einsum(equation = var_1358_equation_0, values = (var_1124_cast_fp16, var_1330_cast_fp16))[name = tensor("op_1358_cast_fp16")]; + tensor var_1360_equation_0 = const()[name = tensor("op_1360_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1360_cast_fp16 = einsum(equation = var_1360_equation_0, values = (var_1124_cast_fp16, var_1331_cast_fp16))[name = tensor("op_1360_cast_fp16")]; + tensor var_1362_equation_0 = const()[name = tensor("op_1362_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1362_cast_fp16 = einsum(equation = var_1362_equation_0, values = (var_1124_cast_fp16, var_1332_cast_fp16))[name = tensor("op_1362_cast_fp16")]; + tensor var_1364_equation_0 = const()[name = tensor("op_1364_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1364_cast_fp16 = einsum(equation = var_1364_equation_0, values = (var_1136_cast_fp16, var_1333_cast_fp16))[name = tensor("op_1364_cast_fp16")]; + tensor var_1366_equation_0 = const()[name = tensor("op_1366_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1366_cast_fp16 = einsum(equation = var_1366_equation_0, values = (var_1136_cast_fp16, var_1334_cast_fp16))[name = tensor("op_1366_cast_fp16")]; + tensor var_1368_equation_0 = const()[name = tensor("op_1368_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1368_cast_fp16 = einsum(equation = var_1368_equation_0, values = (var_1136_cast_fp16, var_1335_cast_fp16))[name = tensor("op_1368_cast_fp16")]; + tensor var_1370_equation_0 = const()[name = tensor("op_1370_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1370_cast_fp16 = einsum(equation = var_1370_equation_0, values = (var_1148_cast_fp16, var_1336_cast_fp16))[name = tensor("op_1370_cast_fp16")]; + tensor var_1372_equation_0 = const()[name = tensor("op_1372_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1372_cast_fp16 = einsum(equation = var_1372_equation_0, values = (var_1148_cast_fp16, var_1337_cast_fp16))[name = tensor("op_1372_cast_fp16")]; + tensor var_1374_equation_0 = const()[name = tensor("op_1374_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1374_cast_fp16 = einsum(equation = var_1374_equation_0, values = (var_1148_cast_fp16, var_1338_cast_fp16))[name = tensor("op_1374_cast_fp16")]; + tensor var_1376_equation_0 = const()[name = tensor("op_1376_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1376_cast_fp16 = einsum(equation = var_1376_equation_0, values = (var_1160_cast_fp16, var_1339_cast_fp16))[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_equation_0 = const()[name = tensor("op_1378_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1378_cast_fp16 = einsum(equation = var_1378_equation_0, values = (var_1160_cast_fp16, var_1340_cast_fp16))[name = tensor("op_1378_cast_fp16")]; + tensor var_1380_equation_0 = const()[name = tensor("op_1380_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1380_cast_fp16 = einsum(equation = var_1380_equation_0, values = (var_1160_cast_fp16, var_1341_cast_fp16))[name = tensor("op_1380_cast_fp16")]; + tensor var_1382_equation_0 = const()[name = tensor("op_1382_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1382_cast_fp16 = einsum(equation = var_1382_equation_0, values = (var_1172_cast_fp16, var_1342_cast_fp16))[name = tensor("op_1382_cast_fp16")]; + tensor var_1384_equation_0 = const()[name = tensor("op_1384_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1384_cast_fp16 = einsum(equation = var_1384_equation_0, values = (var_1172_cast_fp16, var_1343_cast_fp16))[name = tensor("op_1384_cast_fp16")]; + tensor var_1386_equation_0 = const()[name = tensor("op_1386_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1386_cast_fp16 = einsum(equation = var_1386_equation_0, values = (var_1172_cast_fp16, var_1344_cast_fp16))[name = tensor("op_1386_cast_fp16")]; + tensor var_1388_equation_0 = const()[name = tensor("op_1388_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1388_cast_fp16 = einsum(equation = var_1388_equation_0, values = (var_1184_cast_fp16, var_1345_cast_fp16))[name = tensor("op_1388_cast_fp16")]; + tensor var_1390_equation_0 = const()[name = tensor("op_1390_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1390_cast_fp16 = einsum(equation = var_1390_equation_0, values = (var_1184_cast_fp16, var_1346_cast_fp16))[name = tensor("op_1390_cast_fp16")]; + tensor var_1392_equation_0 = const()[name = tensor("op_1392_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1392_cast_fp16 = einsum(equation = var_1392_equation_0, values = (var_1184_cast_fp16, var_1347_cast_fp16))[name = tensor("op_1392_cast_fp16")]; + tensor var_1394_equation_0 = const()[name = tensor("op_1394_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1394_cast_fp16 = einsum(equation = var_1394_equation_0, values = (var_1196_cast_fp16, var_1348_cast_fp16))[name = tensor("op_1394_cast_fp16")]; + tensor var_1396_equation_0 = const()[name = tensor("op_1396_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1396_cast_fp16 = einsum(equation = var_1396_equation_0, values = (var_1196_cast_fp16, var_1349_cast_fp16))[name = tensor("op_1396_cast_fp16")]; + tensor var_1398_equation_0 = const()[name = tensor("op_1398_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1398_cast_fp16 = einsum(equation = var_1398_equation_0, values = (var_1196_cast_fp16, var_1350_cast_fp16))[name = tensor("op_1398_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_779, interleave = x_27_interleave_0, values = (var_1352_cast_fp16, var_1354_cast_fp16, var_1356_cast_fp16, var_1358_cast_fp16, var_1360_cast_fp16, var_1362_cast_fp16, var_1364_cast_fp16, var_1366_cast_fp16, var_1368_cast_fp16, var_1370_cast_fp16, var_1372_cast_fp16, var_1374_cast_fp16, var_1376_cast_fp16, var_1378_cast_fp16, var_1380_cast_fp16, var_1382_cast_fp16, var_1384_cast_fp16, var_1386_cast_fp16, var_1388_cast_fp16, var_1390_cast_fp16, var_1392_cast_fp16, var_1394_cast_fp16, var_1396_cast_fp16, var_1398_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1403 = const()[name = tensor("op_1403"), val = tensor([1, 3072, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1403, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1406 = const()[name = tensor("op_1406"), val = tensor([1, 1])]; + tensor var_1408 = const()[name = tensor("op_1408"), val = tensor([1, 1])]; + tensor attention_output_pad_type_0 = const()[name = tensor("attention_output_pad_type_0"), val = tensor("custom")]; + tensor attention_output_pad_0 = const()[name = tensor("attention_output_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232803776)))]; + tensor attention_output_cast_fp16 = conv(dilations = var_1408, groups = var_779, pad = attention_output_pad_0, pad_type = attention_output_pad_type_0, strides = var_1406, weight = blocks_1_attn_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678208)))]; + tensor x_eps_cast_fp16 = concat(axis = var_779, interleave = x_eps_interleave_0, values = (x_29_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_782, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1434_to_fp16 = const()[name = tensor("op_1434_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1434_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678400)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1445 = const()[name = tensor("op_1445"), val = tensor([1, 1])]; + tensor var_1447 = const()[name = tensor("op_1447"), val = tensor([1, 1])]; + tensor input_17_pad_type_0 = const()[name = tensor("input_17_pad_type_0"), val = tensor("custom")]; + tensor input_17_pad_0 = const()[name = tensor("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251684608)))]; + tensor input_17_cast_fp16 = conv(dilations = var_1447, groups = var_779, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_1445, weight = blocks_1_mlp_fc_1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1451 = const()[name = tensor("op_1451"), val = tensor([1, 1])]; + tensor var_1453 = const()[name = tensor("op_1453"), val = tensor([1, 1])]; + tensor x_fc_2_pad_type_0 = const()[name = tensor("x_fc_2_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_pad_0 = const()[name = tensor("x_fc_2_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302016320)))]; + tensor x_fc_2_cast_fp16 = conv(dilations = var_1453, groups = var_779, pad = x_fc_2_pad_0, pad_type = x_fc_2_pad_type_0, strides = var_1451, weight = blocks_1_mlp_fc_2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_1456_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1456_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_1456_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_1459 = const()[name = tensor("op_1459"), val = tensor([1, 1])]; + tensor var_1461 = const()[name = tensor("op_1461"), val = tensor([1, 1])]; + tensor var_1463_pad_type_0 = const()[name = tensor("op_1463_pad_type_0"), val = tensor("custom")]; + tensor var_1463_pad_0 = const()[name = tensor("op_1463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(352348032)))]; + tensor var_1463_cast_fp16 = conv(dilations = var_1461, groups = var_779, pad = var_1463_pad_0, pad_type = var_1463_pad_type_0, strides = var_1459, weight = blocks_1_mlp_proj_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_1463_cast_fp16")]; + tensor new_x = add(x = var_1463_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_1464_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_v_cache_0, new_k_cache_1, new_v_cache_1); +} \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk2.mlmodelc/weights/weight.bin b/Llama-3.2-3B-Instruct_chunk2.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..9f8251fc3213f2ba5fe5d32a78480bd28428d83d --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk2.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d49db7568f50fc8f361e2b4cca2888853752a4784bf7a44b06842bebf37ed2b +size 402679744 diff --git a/Llama-3.2-3B-Instruct_chunk3.mlmodelc/analytics/coremldata.bin b/Llama-3.2-3B-Instruct_chunk3.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a63af39cde8e590e41fffd270ab8aede737490d --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk3.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf21e446e7587de3fd840eae95f3e79729298df568725552f7ef5fd8f954e58c +size 243 diff --git a/Llama-3.2-3B-Instruct_chunk3.mlmodelc/coremldata.bin b/Llama-3.2-3B-Instruct_chunk3.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef844658693d8a7fc2951abf2761f8f5f9bc62c3 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk3.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8129d684aa1ea8b76708a186fe44f7ffc4aa08b4854907105fe41c0825e71875 +size 653 diff --git a/Llama-3.2-3B-Instruct_chunk3.mlmodelc/metadata.json b/Llama-3.2-3B-Instruct_chunk3.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..2d7f3b5135fb936c65404b61a976a65ba64ccdf7 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk3.mlmodelc/metadata.json @@ -0,0 +1,178 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 14, + "Ios16.mul" : 70, + "SliceByIndex" : 88, + "Transpose" : 2, + "Ios16.einsum" : 96, + "Ios16.conv" : 14, + "Ios16.add" : 56, + "Ios16.realDiv" : 4, + "Ios16.softmax" : 48, + "Ios16.reduceL2Norm" : 4, + "Ios16.reshape" : 14, + "Ios16.silu" : 2 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 512, 1, 64]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_3_2_3B_Instruct_2024_11_09_16_14_37_chunk3", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk3.mlmodelc/model.mil b/Llama-3.2-3B-Instruct_chunk3.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..78594b4291dc45ae43652f9a31200581b19ad3c6 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk3.mlmodelc/model.mil @@ -0,0 +1,956 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { + tensor var_13 = const()[name = tensor("op_13"), val = tensor(-1)]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-2)]; + tensor var_19 = const()[name = tensor("op_19"), val = tensor(-3)]; + tensor var_52 = const()[name = tensor("op_52"), val = tensor(1)]; + tensor var_55 = const()[name = tensor("op_55"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_52, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_55, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_79_to_fp16 = const()[name = tensor("op_79_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_79_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_100 = const()[name = tensor("op_100"), val = tensor([1, 3072, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_100, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_103 = const()[name = tensor("op_103"), val = tensor([1, 1])]; + tensor var_105 = const()[name = tensor("op_105"), val = tensor([1, 1])]; + tensor q_1_pad_type_0 = const()[name = tensor("q_1_pad_type_0"), val = tensor("custom")]; + tensor q_1_pad_0 = const()[name = tensor("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6464)))]; + tensor q_1_cast_fp16 = conv(dilations = var_105, groups = var_52, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = var_103, weight = blocks_0_attn_q_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_109 = const()[name = tensor("op_109"), val = tensor([1, 1])]; + tensor var_111 = const()[name = tensor("op_111"), val = tensor([1, 1])]; + tensor k_1_pad_type_0 = const()[name = tensor("k_1_pad_type_0"), val = tensor("custom")]; + tensor k_1_pad_0 = const()[name = tensor("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18880896)))]; + tensor k_1_cast_fp16 = conv(dilations = var_111, groups = var_52, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = var_109, weight = blocks_0_attn_k_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_115 = const()[name = tensor("op_115"), val = tensor([1, 1])]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor v_1_pad_type_0 = const()[name = tensor("v_1_pad_type_0"), val = tensor("custom")]; + tensor v_1_pad_0 = const()[name = tensor("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25172416)))]; + tensor v_1_cast_fp16 = conv(dilations = var_117, groups = var_52, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = var_115, weight = blocks_0_attn_v_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_120 = const()[name = tensor("op_120"), val = tensor([1, 24, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_120, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_122 = const()[name = tensor("op_122"), val = tensor([1, -1, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_122, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_136_begin_0 = const()[name = tensor("op_136_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_136_end_0 = const()[name = tensor("op_136_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_136_end_mask_0 = const()[name = tensor("op_136_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_136_cast_fp16 = slice_by_index(begin = var_136_begin_0, end = var_136_end_0, end_mask = var_136_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_136_cast_fp16")]; + tensor var_142_begin_0 = const()[name = tensor("op_142_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_142_end_0 = const()[name = tensor("op_142_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_142_end_mask_0 = const()[name = tensor("op_142_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_142_cast_fp16 = slice_by_index(begin = var_142_begin_0, end = var_142_end_0, end_mask = var_142_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_142_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_144_cast_fp16 = mul(x = var_142_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_144_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_17, interleave = rotated_1_interleave_0, values = (var_144_cast_fp16, var_136_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_147_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_147_cast_fp16")]; + tensor var_148_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_148_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_147_cast_fp16, y = var_148_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor var_167_begin_0 = const()[name = tensor("op_167_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_167_end_0 = const()[name = tensor("op_167_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_167_end_mask_0 = const()[name = tensor("op_167_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_167_cast_fp16 = slice_by_index(begin = var_167_begin_0, end = var_167_end_0, end_mask = var_167_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_167_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_169_cast_fp16 = mul(x = var_167_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_169_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_17, interleave = rotated_3_interleave_0, values = (var_169_cast_fp16, var_161_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_172_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_172_cast_fp16")]; + tensor var_173_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_173_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_172_cast_fp16, y = var_173_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_176 = const()[name = tensor("op_176"), val = tensor([1, -1, 1, 64])]; + tensor k_7_cast_fp16 = reshape(shape = var_176, x = roped_3_cast_fp16)[name = tensor("k_7_cast_fp16")]; + tensor var_178 = const()[name = tensor("op_178"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_178, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_11_interleave_0 = const()[name = tensor("k_11_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_9_perm_0, x = k_7_cast_fp16)[name = tensor("transpose_1")]; + tensor k_11_cast_fp16 = concat(axis = var_19, interleave = k_11_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_11_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_13, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1, 3072, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_186, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_191_begin_0 = const()[name = tensor("op_191_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_191_end_0 = const()[name = tensor("op_191_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_191_end_mask_0 = const()[name = tensor("op_191_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_191_cast_fp16 = slice_by_index(begin = var_191_begin_0, end = var_191_end_0, end_mask = var_191_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_191_cast_fp16")]; + tensor var_195_begin_0 = const()[name = tensor("op_195_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_195_end_0 = const()[name = tensor("op_195_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_195_end_mask_0 = const()[name = tensor("op_195_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_195_cast_fp16 = slice_by_index(begin = var_195_begin_0, end = var_195_end_0, end_mask = var_195_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_195_cast_fp16")]; + tensor var_199_begin_0 = const()[name = tensor("op_199_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_199_end_0 = const()[name = tensor("op_199_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_199_end_mask_0 = const()[name = tensor("op_199_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_199_cast_fp16 = slice_by_index(begin = var_199_begin_0, end = var_199_end_0, end_mask = var_199_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_199_cast_fp16")]; + tensor var_203_begin_0 = const()[name = tensor("op_203_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_203_end_0 = const()[name = tensor("op_203_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_203_end_mask_0 = const()[name = tensor("op_203_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_203_cast_fp16 = slice_by_index(begin = var_203_begin_0, end = var_203_end_0, end_mask = var_203_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_203_cast_fp16")]; + tensor var_207_begin_0 = const()[name = tensor("op_207_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_207_end_0 = const()[name = tensor("op_207_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_207_end_mask_0 = const()[name = tensor("op_207_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = var_207_end_0, end_mask = var_207_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_207_cast_fp16")]; + tensor var_211_begin_0 = const()[name = tensor("op_211_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_211_end_0 = const()[name = tensor("op_211_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_211_end_mask_0 = const()[name = tensor("op_211_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_211_cast_fp16 = slice_by_index(begin = var_211_begin_0, end = var_211_end_0, end_mask = var_211_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_211_cast_fp16")]; + tensor var_215_begin_0 = const()[name = tensor("op_215_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_215_end_0 = const()[name = tensor("op_215_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_215_end_mask_0 = const()[name = tensor("op_215_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_215_cast_fp16 = slice_by_index(begin = var_215_begin_0, end = var_215_end_0, end_mask = var_215_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_215_cast_fp16")]; + tensor var_219_begin_0 = const()[name = tensor("op_219_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_219_end_0 = const()[name = tensor("op_219_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_219_end_mask_0 = const()[name = tensor("op_219_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_219_cast_fp16 = slice_by_index(begin = var_219_begin_0, end = var_219_end_0, end_mask = var_219_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_219_cast_fp16")]; + tensor var_223_begin_0 = const()[name = tensor("op_223_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_223_end_0 = const()[name = tensor("op_223_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_223_end_mask_0 = const()[name = tensor("op_223_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_223_cast_fp16 = slice_by_index(begin = var_223_begin_0, end = var_223_end_0, end_mask = var_223_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_223_cast_fp16")]; + tensor var_227_begin_0 = const()[name = tensor("op_227_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_227_end_0 = const()[name = tensor("op_227_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_227_end_mask_0 = const()[name = tensor("op_227_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_227_cast_fp16")]; + tensor var_231_begin_0 = const()[name = tensor("op_231_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_231_end_0 = const()[name = tensor("op_231_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_231_end_mask_0 = const()[name = tensor("op_231_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_231_cast_fp16 = slice_by_index(begin = var_231_begin_0, end = var_231_end_0, end_mask = var_231_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_231_cast_fp16")]; + tensor var_235_begin_0 = const()[name = tensor("op_235_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_235_end_0 = const()[name = tensor("op_235_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_235_end_mask_0 = const()[name = tensor("op_235_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = var_235_end_0, end_mask = var_235_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_235_cast_fp16")]; + tensor var_239_begin_0 = const()[name = tensor("op_239_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_239_end_0 = const()[name = tensor("op_239_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_239_end_mask_0 = const()[name = tensor("op_239_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_239_cast_fp16")]; + tensor var_243_begin_0 = const()[name = tensor("op_243_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_243_end_0 = const()[name = tensor("op_243_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_243_end_mask_0 = const()[name = tensor("op_243_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_243_cast_fp16")]; + tensor var_247_begin_0 = const()[name = tensor("op_247_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_247_end_0 = const()[name = tensor("op_247_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_247_end_mask_0 = const()[name = tensor("op_247_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_247_cast_fp16")]; + tensor var_251_begin_0 = const()[name = tensor("op_251_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_251_end_0 = const()[name = tensor("op_251_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_251_end_mask_0 = const()[name = tensor("op_251_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_251_cast_fp16")]; + tensor var_255_begin_0 = const()[name = tensor("op_255_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_255_end_0 = const()[name = tensor("op_255_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_255_end_mask_0 = const()[name = tensor("op_255_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_255_cast_fp16")]; + tensor var_259_begin_0 = const()[name = tensor("op_259_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_259_end_0 = const()[name = tensor("op_259_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_259_end_mask_0 = const()[name = tensor("op_259_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_263_cast_fp16")]; + tensor var_267_begin_0 = const()[name = tensor("op_267_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_267_end_0 = const()[name = tensor("op_267_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_267_end_mask_0 = const()[name = tensor("op_267_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_267_cast_fp16")]; + tensor var_271_begin_0 = const()[name = tensor("op_271_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_271_end_0 = const()[name = tensor("op_271_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_271_end_mask_0 = const()[name = tensor("op_271_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_271_cast_fp16")]; + tensor var_275_begin_0 = const()[name = tensor("op_275_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_275_end_0 = const()[name = tensor("op_275_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_275_end_mask_0 = const()[name = tensor("op_275_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_275_cast_fp16")]; + tensor var_279_begin_0 = const()[name = tensor("op_279_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_279_end_0 = const()[name = tensor("op_279_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_279_end_mask_0 = const()[name = tensor("op_279_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_279_cast_fp16")]; + tensor var_283_begin_0 = const()[name = tensor("op_283_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_283_end_0 = const()[name = tensor("op_283_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_283_end_mask_0 = const()[name = tensor("op_283_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_283_cast_fp16")]; + tensor var_289_begin_0 = const()[name = tensor("op_289_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_289_end_0 = const()[name = tensor("op_289_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_289_end_mask_0 = const()[name = tensor("op_289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_289_cast_fp16")]; + tensor var_301_begin_0 = const()[name = tensor("op_301_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_301_end_0 = const()[name = tensor("op_301_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_301_end_mask_0 = const()[name = tensor("op_301_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_301_cast_fp16")]; + tensor var_313_begin_0 = const()[name = tensor("op_313_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_313_end_0 = const()[name = tensor("op_313_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_313_end_mask_0 = const()[name = tensor("op_313_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_313_cast_fp16 = slice_by_index(begin = var_313_begin_0, end = var_313_end_0, end_mask = var_313_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_313_cast_fp16")]; + tensor var_325_begin_0 = const()[name = tensor("op_325_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_325_end_0 = const()[name = tensor("op_325_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_325_end_mask_0 = const()[name = tensor("op_325_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_325_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_349_begin_0 = const()[name = tensor("op_349_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_349_end_0 = const()[name = tensor("op_349_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_349_end_mask_0 = const()[name = tensor("op_349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_349_cast_fp16 = slice_by_index(begin = var_349_begin_0, end = var_349_end_0, end_mask = var_349_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_349_cast_fp16")]; + tensor var_361_begin_0 = const()[name = tensor("op_361_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_361_end_0 = const()[name = tensor("op_361_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_361_end_mask_0 = const()[name = tensor("op_361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_361_cast_fp16 = slice_by_index(begin = var_361_begin_0, end = var_361_end_0, end_mask = var_361_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_361_cast_fp16")]; + tensor var_373_begin_0 = const()[name = tensor("op_373_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_373_end_0 = const()[name = tensor("op_373_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_373_end_mask_0 = const()[name = tensor("op_373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_373_cast_fp16")]; + tensor var_383_begin_0 = const()[name = tensor("op_383_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_383_end_0 = const()[name = tensor("op_383_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_383_end_mask_0 = const()[name = tensor("op_383_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_383_cast_fp16")]; + tensor var_395_begin_0 = const()[name = tensor("op_395_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_395_end_0 = const()[name = tensor("op_395_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_395_end_mask_0 = const()[name = tensor("op_395_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_395_cast_fp16")]; + tensor var_407_begin_0 = const()[name = tensor("op_407_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_407_end_0 = const()[name = tensor("op_407_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_407_end_mask_0 = const()[name = tensor("op_407_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_407_cast_fp16 = slice_by_index(begin = var_407_begin_0, end = var_407_end_0, end_mask = var_407_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_407_cast_fp16")]; + tensor var_419_begin_0 = const()[name = tensor("op_419_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_419_end_0 = const()[name = tensor("op_419_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_419_end_mask_0 = const()[name = tensor("op_419_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_419_cast_fp16 = slice_by_index(begin = var_419_begin_0, end = var_419_end_0, end_mask = var_419_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_419_cast_fp16")]; + tensor var_431_begin_0 = const()[name = tensor("op_431_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_431_end_0 = const()[name = tensor("op_431_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_431_end_mask_0 = const()[name = tensor("op_431_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_431_cast_fp16 = slice_by_index(begin = var_431_begin_0, end = var_431_end_0, end_mask = var_431_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_431_cast_fp16")]; + tensor var_443_begin_0 = const()[name = tensor("op_443_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_443_end_0 = const()[name = tensor("op_443_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_443_end_mask_0 = const()[name = tensor("op_443_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_443_cast_fp16 = slice_by_index(begin = var_443_begin_0, end = var_443_end_0, end_mask = var_443_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_443_cast_fp16")]; + tensor var_455_begin_0 = const()[name = tensor("op_455_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_455_end_0 = const()[name = tensor("op_455_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_455_end_mask_0 = const()[name = tensor("op_455_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_455_cast_fp16")]; + tensor var_467_begin_0 = const()[name = tensor("op_467_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_467_end_0 = const()[name = tensor("op_467_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_467_end_mask_0 = const()[name = tensor("op_467_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_467_cast_fp16 = slice_by_index(begin = var_467_begin_0, end = var_467_end_0, end_mask = var_467_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_467_cast_fp16")]; + tensor var_479_equation_0 = const()[name = tensor("op_479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_289_cast_fp16, var_191_cast_fp16))[name = tensor("op_479_cast_fp16")]; + tensor var_480_to_fp16 = const()[name = tensor("op_480_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_481_cast_fp16 = mul(x = var_479_cast_fp16, y = var_480_to_fp16)[name = tensor("op_481_cast_fp16")]; + tensor var_483_equation_0 = const()[name = tensor("op_483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_289_cast_fp16, var_195_cast_fp16))[name = tensor("op_483_cast_fp16")]; + tensor var_484_to_fp16 = const()[name = tensor("op_484_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_485_cast_fp16 = mul(x = var_483_cast_fp16, y = var_484_to_fp16)[name = tensor("op_485_cast_fp16")]; + tensor var_487_equation_0 = const()[name = tensor("op_487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_487_cast_fp16 = einsum(equation = var_487_equation_0, values = (var_289_cast_fp16, var_199_cast_fp16))[name = tensor("op_487_cast_fp16")]; + tensor var_488_to_fp16 = const()[name = tensor("op_488_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_489_cast_fp16 = mul(x = var_487_cast_fp16, y = var_488_to_fp16)[name = tensor("op_489_cast_fp16")]; + tensor var_491_equation_0 = const()[name = tensor("op_491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_491_cast_fp16 = einsum(equation = var_491_equation_0, values = (var_301_cast_fp16, var_203_cast_fp16))[name = tensor("op_491_cast_fp16")]; + tensor var_492_to_fp16 = const()[name = tensor("op_492_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_493_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = tensor("op_493_cast_fp16")]; + tensor var_495_equation_0 = const()[name = tensor("op_495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_495_cast_fp16 = einsum(equation = var_495_equation_0, values = (var_301_cast_fp16, var_207_cast_fp16))[name = tensor("op_495_cast_fp16")]; + tensor var_496_to_fp16 = const()[name = tensor("op_496_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_497_cast_fp16 = mul(x = var_495_cast_fp16, y = var_496_to_fp16)[name = tensor("op_497_cast_fp16")]; + tensor var_499_equation_0 = const()[name = tensor("op_499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_499_cast_fp16 = einsum(equation = var_499_equation_0, values = (var_301_cast_fp16, var_211_cast_fp16))[name = tensor("op_499_cast_fp16")]; + tensor var_500_to_fp16 = const()[name = tensor("op_500_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_501_cast_fp16 = mul(x = var_499_cast_fp16, y = var_500_to_fp16)[name = tensor("op_501_cast_fp16")]; + tensor var_503_equation_0 = const()[name = tensor("op_503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_503_cast_fp16 = einsum(equation = var_503_equation_0, values = (var_313_cast_fp16, var_215_cast_fp16))[name = tensor("op_503_cast_fp16")]; + tensor var_504_to_fp16 = const()[name = tensor("op_504_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_505_cast_fp16 = mul(x = var_503_cast_fp16, y = var_504_to_fp16)[name = tensor("op_505_cast_fp16")]; + tensor var_507_equation_0 = const()[name = tensor("op_507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_507_cast_fp16 = einsum(equation = var_507_equation_0, values = (var_313_cast_fp16, var_219_cast_fp16))[name = tensor("op_507_cast_fp16")]; + tensor var_508_to_fp16 = const()[name = tensor("op_508_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16, y = var_508_to_fp16)[name = tensor("op_509_cast_fp16")]; + tensor var_511_equation_0 = const()[name = tensor("op_511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_511_cast_fp16 = einsum(equation = var_511_equation_0, values = (var_313_cast_fp16, var_223_cast_fp16))[name = tensor("op_511_cast_fp16")]; + tensor var_512_to_fp16 = const()[name = tensor("op_512_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_513_cast_fp16 = mul(x = var_511_cast_fp16, y = var_512_to_fp16)[name = tensor("op_513_cast_fp16")]; + tensor var_515_equation_0 = const()[name = tensor("op_515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_515_cast_fp16 = einsum(equation = var_515_equation_0, values = (var_325_cast_fp16, var_227_cast_fp16))[name = tensor("op_515_cast_fp16")]; + tensor var_516_to_fp16 = const()[name = tensor("op_516_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_517_cast_fp16 = mul(x = var_515_cast_fp16, y = var_516_to_fp16)[name = tensor("op_517_cast_fp16")]; + tensor var_519_equation_0 = const()[name = tensor("op_519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_519_cast_fp16 = einsum(equation = var_519_equation_0, values = (var_325_cast_fp16, var_231_cast_fp16))[name = tensor("op_519_cast_fp16")]; + tensor var_520_to_fp16 = const()[name = tensor("op_520_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_521_cast_fp16 = mul(x = var_519_cast_fp16, y = var_520_to_fp16)[name = tensor("op_521_cast_fp16")]; + tensor var_523_equation_0 = const()[name = tensor("op_523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_325_cast_fp16, var_235_cast_fp16))[name = tensor("op_523_cast_fp16")]; + tensor var_524_to_fp16 = const()[name = tensor("op_524_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_525_cast_fp16 = mul(x = var_523_cast_fp16, y = var_524_to_fp16)[name = tensor("op_525_cast_fp16")]; + tensor var_527_equation_0 = const()[name = tensor("op_527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_337_cast_fp16, var_239_cast_fp16))[name = tensor("op_527_cast_fp16")]; + tensor var_528_to_fp16 = const()[name = tensor("op_528_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_529_cast_fp16 = mul(x = var_527_cast_fp16, y = var_528_to_fp16)[name = tensor("op_529_cast_fp16")]; + tensor var_531_equation_0 = const()[name = tensor("op_531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_337_cast_fp16, var_243_cast_fp16))[name = tensor("op_531_cast_fp16")]; + tensor var_532_to_fp16 = const()[name = tensor("op_532_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_533_cast_fp16 = mul(x = var_531_cast_fp16, y = var_532_to_fp16)[name = tensor("op_533_cast_fp16")]; + tensor var_535_equation_0 = const()[name = tensor("op_535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_337_cast_fp16, var_247_cast_fp16))[name = tensor("op_535_cast_fp16")]; + tensor var_536_to_fp16 = const()[name = tensor("op_536_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_537_cast_fp16 = mul(x = var_535_cast_fp16, y = var_536_to_fp16)[name = tensor("op_537_cast_fp16")]; + tensor var_539_equation_0 = const()[name = tensor("op_539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_539_cast_fp16 = einsum(equation = var_539_equation_0, values = (var_349_cast_fp16, var_251_cast_fp16))[name = tensor("op_539_cast_fp16")]; + tensor var_540_to_fp16 = const()[name = tensor("op_540_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_541_cast_fp16 = mul(x = var_539_cast_fp16, y = var_540_to_fp16)[name = tensor("op_541_cast_fp16")]; + tensor var_543_equation_0 = const()[name = tensor("op_543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_543_cast_fp16 = einsum(equation = var_543_equation_0, values = (var_349_cast_fp16, var_255_cast_fp16))[name = tensor("op_543_cast_fp16")]; + tensor var_544_to_fp16 = const()[name = tensor("op_544_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_545_cast_fp16 = mul(x = var_543_cast_fp16, y = var_544_to_fp16)[name = tensor("op_545_cast_fp16")]; + tensor var_547_equation_0 = const()[name = tensor("op_547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_349_cast_fp16, var_259_cast_fp16))[name = tensor("op_547_cast_fp16")]; + tensor var_548_to_fp16 = const()[name = tensor("op_548_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_549_cast_fp16 = mul(x = var_547_cast_fp16, y = var_548_to_fp16)[name = tensor("op_549_cast_fp16")]; + tensor var_551_equation_0 = const()[name = tensor("op_551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_361_cast_fp16, var_263_cast_fp16))[name = tensor("op_551_cast_fp16")]; + tensor var_552_to_fp16 = const()[name = tensor("op_552_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_553_cast_fp16 = mul(x = var_551_cast_fp16, y = var_552_to_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_555_equation_0 = const()[name = tensor("op_555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_361_cast_fp16, var_267_cast_fp16))[name = tensor("op_555_cast_fp16")]; + tensor var_556_to_fp16 = const()[name = tensor("op_556_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_557_cast_fp16 = mul(x = var_555_cast_fp16, y = var_556_to_fp16)[name = tensor("op_557_cast_fp16")]; + tensor var_559_equation_0 = const()[name = tensor("op_559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_361_cast_fp16, var_271_cast_fp16))[name = tensor("op_559_cast_fp16")]; + tensor var_560_to_fp16 = const()[name = tensor("op_560_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = var_560_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor var_563_equation_0 = const()[name = tensor("op_563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_373_cast_fp16, var_275_cast_fp16))[name = tensor("op_563_cast_fp16")]; + tensor var_564_to_fp16 = const()[name = tensor("op_564_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_565_cast_fp16 = mul(x = var_563_cast_fp16, y = var_564_to_fp16)[name = tensor("op_565_cast_fp16")]; + tensor var_567_equation_0 = const()[name = tensor("op_567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_373_cast_fp16, var_279_cast_fp16))[name = tensor("op_567_cast_fp16")]; + tensor var_568_to_fp16 = const()[name = tensor("op_568_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_569_cast_fp16 = mul(x = var_567_cast_fp16, y = var_568_to_fp16)[name = tensor("op_569_cast_fp16")]; + tensor var_571_equation_0 = const()[name = tensor("op_571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_373_cast_fp16, var_283_cast_fp16))[name = tensor("op_571_cast_fp16")]; + tensor var_572_to_fp16 = const()[name = tensor("op_572_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_573_cast_fp16 = mul(x = var_571_cast_fp16, y = var_572_to_fp16)[name = tensor("op_573_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_481_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_485_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_489_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_493_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_497_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_501_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_505_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_509_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_513_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_517_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_521_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_525_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_529_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_533_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_537_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_541_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_545_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_549_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_553_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_557_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_561_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_565_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_569_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_573_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor var_598_cast_fp16 = softmax(axis = var_52, x = aw_1_cast_fp16)[name = tensor("op_598_cast_fp16")]; + tensor var_599_cast_fp16 = softmax(axis = var_52, x = aw_3_cast_fp16)[name = tensor("op_599_cast_fp16")]; + tensor var_600_cast_fp16 = softmax(axis = var_52, x = aw_5_cast_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_601_cast_fp16 = softmax(axis = var_52, x = aw_7_cast_fp16)[name = tensor("op_601_cast_fp16")]; + tensor var_602_cast_fp16 = softmax(axis = var_52, x = aw_9_cast_fp16)[name = tensor("op_602_cast_fp16")]; + tensor var_603_cast_fp16 = softmax(axis = var_52, x = aw_11_cast_fp16)[name = tensor("op_603_cast_fp16")]; + tensor var_604_cast_fp16 = softmax(axis = var_52, x = aw_13_cast_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_605_cast_fp16 = softmax(axis = var_52, x = aw_15_cast_fp16)[name = tensor("op_605_cast_fp16")]; + tensor var_606_cast_fp16 = softmax(axis = var_52, x = aw_17_cast_fp16)[name = tensor("op_606_cast_fp16")]; + tensor var_607_cast_fp16 = softmax(axis = var_52, x = aw_19_cast_fp16)[name = tensor("op_607_cast_fp16")]; + tensor var_608_cast_fp16 = softmax(axis = var_52, x = aw_21_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_609_cast_fp16 = softmax(axis = var_52, x = aw_23_cast_fp16)[name = tensor("op_609_cast_fp16")]; + tensor var_610_cast_fp16 = softmax(axis = var_52, x = aw_25_cast_fp16)[name = tensor("op_610_cast_fp16")]; + tensor var_611_cast_fp16 = softmax(axis = var_52, x = aw_27_cast_fp16)[name = tensor("op_611_cast_fp16")]; + tensor var_612_cast_fp16 = softmax(axis = var_52, x = aw_29_cast_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_613_cast_fp16 = softmax(axis = var_52, x = aw_31_cast_fp16)[name = tensor("op_613_cast_fp16")]; + tensor var_614_cast_fp16 = softmax(axis = var_52, x = aw_33_cast_fp16)[name = tensor("op_614_cast_fp16")]; + tensor var_615_cast_fp16 = softmax(axis = var_52, x = aw_35_cast_fp16)[name = tensor("op_615_cast_fp16")]; + tensor var_616_cast_fp16 = softmax(axis = var_52, x = aw_37_cast_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_617_cast_fp16 = softmax(axis = var_52, x = aw_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618_cast_fp16 = softmax(axis = var_52, x = aw_41_cast_fp16)[name = tensor("op_618_cast_fp16")]; + tensor var_619_cast_fp16 = softmax(axis = var_52, x = aw_43_cast_fp16)[name = tensor("op_619_cast_fp16")]; + tensor var_620_cast_fp16 = softmax(axis = var_52, x = aw_45_cast_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_621_cast_fp16 = softmax(axis = var_52, x = aw_47_cast_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_623_equation_0 = const()[name = tensor("op_623_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_383_cast_fp16, var_598_cast_fp16))[name = tensor("op_623_cast_fp16")]; + tensor var_625_equation_0 = const()[name = tensor("op_625_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_383_cast_fp16, var_599_cast_fp16))[name = tensor("op_625_cast_fp16")]; + tensor var_627_equation_0 = const()[name = tensor("op_627_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_383_cast_fp16, var_600_cast_fp16))[name = tensor("op_627_cast_fp16")]; + tensor var_629_equation_0 = const()[name = tensor("op_629_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_395_cast_fp16, var_601_cast_fp16))[name = tensor("op_629_cast_fp16")]; + tensor var_631_equation_0 = const()[name = tensor("op_631_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_395_cast_fp16, var_602_cast_fp16))[name = tensor("op_631_cast_fp16")]; + tensor var_633_equation_0 = const()[name = tensor("op_633_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_395_cast_fp16, var_603_cast_fp16))[name = tensor("op_633_cast_fp16")]; + tensor var_635_equation_0 = const()[name = tensor("op_635_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_407_cast_fp16, var_604_cast_fp16))[name = tensor("op_635_cast_fp16")]; + tensor var_637_equation_0 = const()[name = tensor("op_637_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_407_cast_fp16, var_605_cast_fp16))[name = tensor("op_637_cast_fp16")]; + tensor var_639_equation_0 = const()[name = tensor("op_639_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_407_cast_fp16, var_606_cast_fp16))[name = tensor("op_639_cast_fp16")]; + tensor var_641_equation_0 = const()[name = tensor("op_641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_419_cast_fp16, var_607_cast_fp16))[name = tensor("op_641_cast_fp16")]; + tensor var_643_equation_0 = const()[name = tensor("op_643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_419_cast_fp16, var_608_cast_fp16))[name = tensor("op_643_cast_fp16")]; + tensor var_645_equation_0 = const()[name = tensor("op_645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_419_cast_fp16, var_609_cast_fp16))[name = tensor("op_645_cast_fp16")]; + tensor var_647_equation_0 = const()[name = tensor("op_647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_647_cast_fp16 = einsum(equation = var_647_equation_0, values = (var_431_cast_fp16, var_610_cast_fp16))[name = tensor("op_647_cast_fp16")]; + tensor var_649_equation_0 = const()[name = tensor("op_649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_649_cast_fp16 = einsum(equation = var_649_equation_0, values = (var_431_cast_fp16, var_611_cast_fp16))[name = tensor("op_649_cast_fp16")]; + tensor var_651_equation_0 = const()[name = tensor("op_651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_651_cast_fp16 = einsum(equation = var_651_equation_0, values = (var_431_cast_fp16, var_612_cast_fp16))[name = tensor("op_651_cast_fp16")]; + tensor var_653_equation_0 = const()[name = tensor("op_653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_653_cast_fp16 = einsum(equation = var_653_equation_0, values = (var_443_cast_fp16, var_613_cast_fp16))[name = tensor("op_653_cast_fp16")]; + tensor var_655_equation_0 = const()[name = tensor("op_655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_655_cast_fp16 = einsum(equation = var_655_equation_0, values = (var_443_cast_fp16, var_614_cast_fp16))[name = tensor("op_655_cast_fp16")]; + tensor var_657_equation_0 = const()[name = tensor("op_657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_657_cast_fp16 = einsum(equation = var_657_equation_0, values = (var_443_cast_fp16, var_615_cast_fp16))[name = tensor("op_657_cast_fp16")]; + tensor var_659_equation_0 = const()[name = tensor("op_659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_659_cast_fp16 = einsum(equation = var_659_equation_0, values = (var_455_cast_fp16, var_616_cast_fp16))[name = tensor("op_659_cast_fp16")]; + tensor var_661_equation_0 = const()[name = tensor("op_661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_661_cast_fp16 = einsum(equation = var_661_equation_0, values = (var_455_cast_fp16, var_617_cast_fp16))[name = tensor("op_661_cast_fp16")]; + tensor var_663_equation_0 = const()[name = tensor("op_663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_663_cast_fp16 = einsum(equation = var_663_equation_0, values = (var_455_cast_fp16, var_618_cast_fp16))[name = tensor("op_663_cast_fp16")]; + tensor var_665_equation_0 = const()[name = tensor("op_665_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_665_cast_fp16 = einsum(equation = var_665_equation_0, values = (var_467_cast_fp16, var_619_cast_fp16))[name = tensor("op_665_cast_fp16")]; + tensor var_667_equation_0 = const()[name = tensor("op_667_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_667_cast_fp16 = einsum(equation = var_667_equation_0, values = (var_467_cast_fp16, var_620_cast_fp16))[name = tensor("op_667_cast_fp16")]; + tensor var_669_equation_0 = const()[name = tensor("op_669_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_669_cast_fp16 = einsum(equation = var_669_equation_0, values = (var_467_cast_fp16, var_621_cast_fp16))[name = tensor("op_669_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_52, interleave = x_11_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16, var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16, var_647_cast_fp16, var_649_cast_fp16, var_651_cast_fp16, var_653_cast_fp16, var_655_cast_fp16, var_657_cast_fp16, var_659_cast_fp16, var_661_cast_fp16, var_663_cast_fp16, var_665_cast_fp16, var_667_cast_fp16, var_669_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_674 = const()[name = tensor("op_674"), val = tensor([1, 3072, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_674, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_677 = const()[name = tensor("op_677"), val = tensor([1, 1])]; + tensor var_679 = const()[name = tensor("op_679"), val = tensor([1, 1])]; + tensor attention_output_1_pad_type_0 = const()[name = tensor("attention_output_1_pad_type_0"), val = tensor("custom")]; + tensor attention_output_1_pad_0 = const()[name = tensor("attention_output_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31463936)))]; + tensor attention_output_1_cast_fp16 = conv(dilations = var_679, groups = var_52, pad = attention_output_1_pad_0, pad_type = attention_output_1_pad_type_0, strides = var_677, weight = blocks_0_attn_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338368)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_52, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_55, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_705_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338560)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_716 = const()[name = tensor("op_716"), val = tensor([1, 1])]; + tensor var_718 = const()[name = tensor("op_718"), val = tensor([1, 1])]; + tensor input_7_pad_type_0 = const()[name = tensor("input_7_pad_type_0"), val = tensor("custom")]; + tensor input_7_pad_0 = const()[name = tensor("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50344768)))]; + tensor input_7_cast_fp16 = conv(dilations = var_718, groups = var_52, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_716, weight = blocks_0_mlp_fc_1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_722 = const()[name = tensor("op_722"), val = tensor([1, 1])]; + tensor var_724 = const()[name = tensor("op_724"), val = tensor([1, 1])]; + tensor x_fc_2_1_pad_type_0 = const()[name = tensor("x_fc_2_1_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_1_pad_0 = const()[name = tensor("x_fc_2_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100676480)))]; + tensor x_fc_2_1_cast_fp16 = conv(dilations = var_724, groups = var_52, pad = x_fc_2_1_pad_0, pad_type = x_fc_2_1_pad_type_0, strides = var_722, weight = blocks_0_mlp_fc_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_727_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_727_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_727_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_730 = const()[name = tensor("op_730"), val = tensor([1, 1])]; + tensor var_732 = const()[name = tensor("op_732"), val = tensor([1, 1])]; + tensor var_734_pad_type_0 = const()[name = tensor("op_734_pad_type_0"), val = tensor("custom")]; + tensor var_734_pad_0 = const()[name = tensor("op_734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151008192)))]; + tensor var_734_cast_fp16 = conv(dilations = var_732, groups = var_52, pad = var_734_pad_0, pad_type = var_734_pad_type_0, strides = var_730, weight = blocks_0_mlp_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("op_734_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_734_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_740 = const()[name = tensor("op_740"), val = tensor(-1)]; + tensor var_744 = const()[name = tensor("op_744"), val = tensor(-2)]; + tensor var_746 = const()[name = tensor("op_746"), val = tensor(-3)]; + tensor var_779 = const()[name = tensor("op_779"), val = tensor(1)]; + tensor var_782 = const()[name = tensor("op_782"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201339904)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_779, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_782, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_805_to_fp16 = const()[name = tensor("op_805_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_805_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201340096)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_829 = const()[name = tensor("op_829"), val = tensor([1, 3072, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_829, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_832 = const()[name = tensor("op_832"), val = tensor([1, 1])]; + tensor var_834 = const()[name = tensor("op_834"), val = tensor([1, 1])]; + tensor q_9_pad_type_0 = const()[name = tensor("q_9_pad_type_0"), val = tensor("custom")]; + tensor q_9_pad_0 = const()[name = tensor("q_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201346304)))]; + tensor q_9_cast_fp16 = conv(dilations = var_834, groups = var_779, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = var_832, weight = blocks_1_attn_q_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_838 = const()[name = tensor("op_838"), val = tensor([1, 1])]; + tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 1])]; + tensor k_13_pad_type_0 = const()[name = tensor("k_13_pad_type_0"), val = tensor("custom")]; + tensor k_13_pad_0 = const()[name = tensor("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220220736)))]; + tensor k_13_cast_fp16 = conv(dilations = var_840, groups = var_779, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = var_838, weight = blocks_1_attn_k_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_844 = const()[name = tensor("op_844"), val = tensor([1, 1])]; + tensor var_846 = const()[name = tensor("op_846"), val = tensor([1, 1])]; + tensor v_11_pad_type_0 = const()[name = tensor("v_11_pad_type_0"), val = tensor("custom")]; + tensor v_11_pad_0 = const()[name = tensor("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226512256)))]; + tensor v_11_cast_fp16 = conv(dilations = var_846, groups = var_779, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = var_844, weight = blocks_1_attn_v_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_849 = const()[name = tensor("op_849"), val = tensor([1, 24, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_849, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_851 = const()[name = tensor("op_851"), val = tensor([1, -1, 128, 64])]; + tensor k_15_cast_fp16 = reshape(shape = var_851, x = k_13_cast_fp16)[name = tensor("k_15_cast_fp16")]; + tensor var_865_begin_0 = const()[name = tensor("op_865_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_865_end_0 = const()[name = tensor("op_865_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_865_end_mask_0 = const()[name = tensor("op_865_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_865_cast_fp16")]; + tensor var_871_begin_0 = const()[name = tensor("op_871_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_871_end_0 = const()[name = tensor("op_871_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_871_end_mask_0 = const()[name = tensor("op_871_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_871_cast_fp16 = slice_by_index(begin = var_871_begin_0, end = var_871_end_0, end_mask = var_871_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_871_cast_fp16")]; + tensor const_30_promoted_to_fp16 = const()[name = tensor("const_30_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_873_cast_fp16 = mul(x = var_871_cast_fp16, y = const_30_promoted_to_fp16)[name = tensor("op_873_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_744, interleave = rotated_5_interleave_0, values = (var_873_cast_fp16, var_865_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_876_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_876_cast_fp16")]; + tensor var_877_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_877_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_876_cast_fp16, y = var_877_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_890_begin_0 = const()[name = tensor("op_890_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_890_end_0 = const()[name = tensor("op_890_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_890_end_mask_0 = const()[name = tensor("op_890_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_890_cast_fp16 = slice_by_index(begin = var_890_begin_0, end = var_890_end_0, end_mask = var_890_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_890_cast_fp16")]; + tensor var_896_begin_0 = const()[name = tensor("op_896_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_896_end_0 = const()[name = tensor("op_896_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_896_end_mask_0 = const()[name = tensor("op_896_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_896_cast_fp16 = slice_by_index(begin = var_896_begin_0, end = var_896_end_0, end_mask = var_896_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_896_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_898_cast_fp16 = mul(x = var_896_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_898_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_744, interleave = rotated_interleave_0, values = (var_898_cast_fp16, var_890_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_901_cast_fp16 = mul(x = k_15_cast_fp16, y = cos)[name = tensor("op_901_cast_fp16")]; + tensor var_902_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_902_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_901_cast_fp16, y = var_902_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_905 = const()[name = tensor("op_905"), val = tensor([1, -1, 1, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_905, x = roped_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_907 = const()[name = tensor("op_907"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_907, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_21_perm_0, x = k_19_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_746, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_740, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_915 = const()[name = tensor("op_915"), val = tensor([1, 3072, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_915, x = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_920_begin_0 = const()[name = tensor("op_920_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_end_0 = const()[name = tensor("op_920_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_920_end_mask_0 = const()[name = tensor("op_920_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = q_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor var_924_begin_0 = const()[name = tensor("op_924_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_924_end_0 = const()[name = tensor("op_924_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_924_end_mask_0 = const()[name = tensor("op_924_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_924_cast_fp16 = slice_by_index(begin = var_924_begin_0, end = var_924_end_0, end_mask = var_924_end_mask_0, x = q_cast_fp16)[name = tensor("op_924_cast_fp16")]; + tensor var_928_begin_0 = const()[name = tensor("op_928_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_928_end_0 = const()[name = tensor("op_928_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_928_end_mask_0 = const()[name = tensor("op_928_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = q_cast_fp16)[name = tensor("op_928_cast_fp16")]; + tensor var_932_begin_0 = const()[name = tensor("op_932_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_932_end_0 = const()[name = tensor("op_932_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_932_end_mask_0 = const()[name = tensor("op_932_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_932_cast_fp16 = slice_by_index(begin = var_932_begin_0, end = var_932_end_0, end_mask = var_932_end_mask_0, x = q_cast_fp16)[name = tensor("op_932_cast_fp16")]; + tensor var_936_begin_0 = const()[name = tensor("op_936_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_936_end_0 = const()[name = tensor("op_936_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_936_end_mask_0 = const()[name = tensor("op_936_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_936_cast_fp16 = slice_by_index(begin = var_936_begin_0, end = var_936_end_0, end_mask = var_936_end_mask_0, x = q_cast_fp16)[name = tensor("op_936_cast_fp16")]; + tensor var_940_begin_0 = const()[name = tensor("op_940_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_940_end_0 = const()[name = tensor("op_940_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_940_end_mask_0 = const()[name = tensor("op_940_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_940_cast_fp16 = slice_by_index(begin = var_940_begin_0, end = var_940_end_0, end_mask = var_940_end_mask_0, x = q_cast_fp16)[name = tensor("op_940_cast_fp16")]; + tensor var_944_begin_0 = const()[name = tensor("op_944_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_944_end_0 = const()[name = tensor("op_944_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_944_end_mask_0 = const()[name = tensor("op_944_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_944_cast_fp16 = slice_by_index(begin = var_944_begin_0, end = var_944_end_0, end_mask = var_944_end_mask_0, x = q_cast_fp16)[name = tensor("op_944_cast_fp16")]; + tensor var_948_begin_0 = const()[name = tensor("op_948_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_948_end_0 = const()[name = tensor("op_948_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_948_end_mask_0 = const()[name = tensor("op_948_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_948_cast_fp16 = slice_by_index(begin = var_948_begin_0, end = var_948_end_0, end_mask = var_948_end_mask_0, x = q_cast_fp16)[name = tensor("op_948_cast_fp16")]; + tensor var_952_begin_0 = const()[name = tensor("op_952_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_952_end_0 = const()[name = tensor("op_952_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_952_end_mask_0 = const()[name = tensor("op_952_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_952_cast_fp16 = slice_by_index(begin = var_952_begin_0, end = var_952_end_0, end_mask = var_952_end_mask_0, x = q_cast_fp16)[name = tensor("op_952_cast_fp16")]; + tensor var_956_begin_0 = const()[name = tensor("op_956_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_956_end_0 = const()[name = tensor("op_956_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_956_end_mask_0 = const()[name = tensor("op_956_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_956_cast_fp16 = slice_by_index(begin = var_956_begin_0, end = var_956_end_0, end_mask = var_956_end_mask_0, x = q_cast_fp16)[name = tensor("op_956_cast_fp16")]; + tensor var_960_begin_0 = const()[name = tensor("op_960_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_960_end_0 = const()[name = tensor("op_960_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_960_end_mask_0 = const()[name = tensor("op_960_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_960_cast_fp16 = slice_by_index(begin = var_960_begin_0, end = var_960_end_0, end_mask = var_960_end_mask_0, x = q_cast_fp16)[name = tensor("op_960_cast_fp16")]; + tensor var_964_begin_0 = const()[name = tensor("op_964_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_964_end_0 = const()[name = tensor("op_964_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_964_end_mask_0 = const()[name = tensor("op_964_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_964_cast_fp16 = slice_by_index(begin = var_964_begin_0, end = var_964_end_0, end_mask = var_964_end_mask_0, x = q_cast_fp16)[name = tensor("op_964_cast_fp16")]; + tensor var_968_begin_0 = const()[name = tensor("op_968_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_968_end_0 = const()[name = tensor("op_968_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_968_end_mask_0 = const()[name = tensor("op_968_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = q_cast_fp16)[name = tensor("op_968_cast_fp16")]; + tensor var_972_begin_0 = const()[name = tensor("op_972_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_972_end_0 = const()[name = tensor("op_972_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_972_end_mask_0 = const()[name = tensor("op_972_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = q_cast_fp16)[name = tensor("op_972_cast_fp16")]; + tensor var_976_begin_0 = const()[name = tensor("op_976_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_976_end_0 = const()[name = tensor("op_976_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_976_end_mask_0 = const()[name = tensor("op_976_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = var_976_end_0, end_mask = var_976_end_mask_0, x = q_cast_fp16)[name = tensor("op_976_cast_fp16")]; + tensor var_980_begin_0 = const()[name = tensor("op_980_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_980_end_0 = const()[name = tensor("op_980_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_980_end_mask_0 = const()[name = tensor("op_980_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_980_cast_fp16 = slice_by_index(begin = var_980_begin_0, end = var_980_end_0, end_mask = var_980_end_mask_0, x = q_cast_fp16)[name = tensor("op_980_cast_fp16")]; + tensor var_984_begin_0 = const()[name = tensor("op_984_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_984_end_0 = const()[name = tensor("op_984_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_984_end_mask_0 = const()[name = tensor("op_984_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_984_cast_fp16 = slice_by_index(begin = var_984_begin_0, end = var_984_end_0, end_mask = var_984_end_mask_0, x = q_cast_fp16)[name = tensor("op_984_cast_fp16")]; + tensor var_988_begin_0 = const()[name = tensor("op_988_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_988_end_0 = const()[name = tensor("op_988_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_988_end_mask_0 = const()[name = tensor("op_988_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_988_cast_fp16 = slice_by_index(begin = var_988_begin_0, end = var_988_end_0, end_mask = var_988_end_mask_0, x = q_cast_fp16)[name = tensor("op_988_cast_fp16")]; + tensor var_992_begin_0 = const()[name = tensor("op_992_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_992_end_0 = const()[name = tensor("op_992_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_992_end_mask_0 = const()[name = tensor("op_992_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_992_cast_fp16 = slice_by_index(begin = var_992_begin_0, end = var_992_end_0, end_mask = var_992_end_mask_0, x = q_cast_fp16)[name = tensor("op_992_cast_fp16")]; + tensor var_996_begin_0 = const()[name = tensor("op_996_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_996_end_0 = const()[name = tensor("op_996_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_996_end_mask_0 = const()[name = tensor("op_996_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_996_cast_fp16 = slice_by_index(begin = var_996_begin_0, end = var_996_end_0, end_mask = var_996_end_mask_0, x = q_cast_fp16)[name = tensor("op_996_cast_fp16")]; + tensor var_1000_begin_0 = const()[name = tensor("op_1000_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1000_end_0 = const()[name = tensor("op_1000_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1000_end_mask_0 = const()[name = tensor("op_1000_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = var_1000_end_0, end_mask = var_1000_end_mask_0, x = q_cast_fp16)[name = tensor("op_1000_cast_fp16")]; + tensor var_1004_begin_0 = const()[name = tensor("op_1004_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1004_end_0 = const()[name = tensor("op_1004_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1004_end_mask_0 = const()[name = tensor("op_1004_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1004_cast_fp16 = slice_by_index(begin = var_1004_begin_0, end = var_1004_end_0, end_mask = var_1004_end_mask_0, x = q_cast_fp16)[name = tensor("op_1004_cast_fp16")]; + tensor var_1008_begin_0 = const()[name = tensor("op_1008_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1008_end_0 = const()[name = tensor("op_1008_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1008_end_mask_0 = const()[name = tensor("op_1008_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = var_1008_end_0, end_mask = var_1008_end_mask_0, x = q_cast_fp16)[name = tensor("op_1008_cast_fp16")]; + tensor var_1012_begin_0 = const()[name = tensor("op_1012_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1012_end_0 = const()[name = tensor("op_1012_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1012_end_mask_0 = const()[name = tensor("op_1012_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1012_cast_fp16 = slice_by_index(begin = var_1012_begin_0, end = var_1012_end_0, end_mask = var_1012_end_mask_0, x = q_cast_fp16)[name = tensor("op_1012_cast_fp16")]; + tensor var_1018_begin_0 = const()[name = tensor("op_1018_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1018_end_0 = const()[name = tensor("op_1018_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1018_end_mask_0 = const()[name = tensor("op_1018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1018_cast_fp16 = slice_by_index(begin = var_1018_begin_0, end = var_1018_end_0, end_mask = var_1018_end_mask_0, x = k_cast_fp16)[name = tensor("op_1018_cast_fp16")]; + tensor var_1030_begin_0 = const()[name = tensor("op_1030_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1030_end_0 = const()[name = tensor("op_1030_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1030_end_mask_0 = const()[name = tensor("op_1030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, x = k_cast_fp16)[name = tensor("op_1030_cast_fp16")]; + tensor var_1042_begin_0 = const()[name = tensor("op_1042_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1042_end_0 = const()[name = tensor("op_1042_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1042_end_mask_0 = const()[name = tensor("op_1042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = k_cast_fp16)[name = tensor("op_1042_cast_fp16")]; + tensor var_1054_begin_0 = const()[name = tensor("op_1054_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1054_end_0 = const()[name = tensor("op_1054_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1054_end_mask_0 = const()[name = tensor("op_1054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1054_cast_fp16 = slice_by_index(begin = var_1054_begin_0, end = var_1054_end_0, end_mask = var_1054_end_mask_0, x = k_cast_fp16)[name = tensor("op_1054_cast_fp16")]; + tensor var_1066_begin_0 = const()[name = tensor("op_1066_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1066_end_0 = const()[name = tensor("op_1066_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1066_end_mask_0 = const()[name = tensor("op_1066_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1066_cast_fp16 = slice_by_index(begin = var_1066_begin_0, end = var_1066_end_0, end_mask = var_1066_end_mask_0, x = k_cast_fp16)[name = tensor("op_1066_cast_fp16")]; + tensor var_1078_begin_0 = const()[name = tensor("op_1078_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1078_end_0 = const()[name = tensor("op_1078_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1078_end_mask_0 = const()[name = tensor("op_1078_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1078_cast_fp16 = slice_by_index(begin = var_1078_begin_0, end = var_1078_end_0, end_mask = var_1078_end_mask_0, x = k_cast_fp16)[name = tensor("op_1078_cast_fp16")]; + tensor var_1090_begin_0 = const()[name = tensor("op_1090_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1090_end_0 = const()[name = tensor("op_1090_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1090_end_mask_0 = const()[name = tensor("op_1090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = k_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor var_1102_begin_0 = const()[name = tensor("op_1102_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1102_end_0 = const()[name = tensor("op_1102_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1102_end_mask_0 = const()[name = tensor("op_1102_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1102_cast_fp16 = slice_by_index(begin = var_1102_begin_0, end = var_1102_end_0, end_mask = var_1102_end_mask_0, x = k_cast_fp16)[name = tensor("op_1102_cast_fp16")]; + tensor var_1112_begin_0 = const()[name = tensor("op_1112_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1112_end_0 = const()[name = tensor("op_1112_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1112_end_mask_0 = const()[name = tensor("op_1112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1112_cast_fp16")]; + tensor var_1124_begin_0 = const()[name = tensor("op_1124_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1124_end_0 = const()[name = tensor("op_1124_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1124_end_mask_0 = const()[name = tensor("op_1124_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1124_cast_fp16")]; + tensor var_1136_begin_0 = const()[name = tensor("op_1136_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1136_end_0 = const()[name = tensor("op_1136_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1136_end_mask_0 = const()[name = tensor("op_1136_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1136_cast_fp16")]; + tensor var_1148_begin_0 = const()[name = tensor("op_1148_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1148_end_0 = const()[name = tensor("op_1148_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1148_end_mask_0 = const()[name = tensor("op_1148_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1148_cast_fp16")]; + tensor var_1160_begin_0 = const()[name = tensor("op_1160_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1160_end_0 = const()[name = tensor("op_1160_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1160_end_mask_0 = const()[name = tensor("op_1160_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1160_cast_fp16")]; + tensor var_1172_begin_0 = const()[name = tensor("op_1172_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1172_end_0 = const()[name = tensor("op_1172_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1172_end_mask_0 = const()[name = tensor("op_1172_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1172_cast_fp16 = slice_by_index(begin = var_1172_begin_0, end = var_1172_end_0, end_mask = var_1172_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1172_cast_fp16")]; + tensor var_1184_begin_0 = const()[name = tensor("op_1184_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1184_end_0 = const()[name = tensor("op_1184_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1184_end_mask_0 = const()[name = tensor("op_1184_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1184_cast_fp16 = slice_by_index(begin = var_1184_begin_0, end = var_1184_end_0, end_mask = var_1184_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1184_cast_fp16")]; + tensor var_1196_begin_0 = const()[name = tensor("op_1196_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1196_end_0 = const()[name = tensor("op_1196_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1196_end_mask_0 = const()[name = tensor("op_1196_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1196_cast_fp16")]; + tensor var_1208_equation_0 = const()[name = tensor("op_1208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1208_cast_fp16 = einsum(equation = var_1208_equation_0, values = (var_1018_cast_fp16, var_920_cast_fp16))[name = tensor("op_1208_cast_fp16")]; + tensor var_1209_to_fp16 = const()[name = tensor("op_1209_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1210_cast_fp16 = mul(x = var_1208_cast_fp16, y = var_1209_to_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1212_equation_0 = const()[name = tensor("op_1212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1212_cast_fp16 = einsum(equation = var_1212_equation_0, values = (var_1018_cast_fp16, var_924_cast_fp16))[name = tensor("op_1212_cast_fp16")]; + tensor var_1213_to_fp16 = const()[name = tensor("op_1213_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1214_cast_fp16 = mul(x = var_1212_cast_fp16, y = var_1213_to_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1216_equation_0 = const()[name = tensor("op_1216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1216_cast_fp16 = einsum(equation = var_1216_equation_0, values = (var_1018_cast_fp16, var_928_cast_fp16))[name = tensor("op_1216_cast_fp16")]; + tensor var_1217_to_fp16 = const()[name = tensor("op_1217_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1218_cast_fp16 = mul(x = var_1216_cast_fp16, y = var_1217_to_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1220_equation_0 = const()[name = tensor("op_1220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1220_cast_fp16 = einsum(equation = var_1220_equation_0, values = (var_1030_cast_fp16, var_932_cast_fp16))[name = tensor("op_1220_cast_fp16")]; + tensor var_1221_to_fp16 = const()[name = tensor("op_1221_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1222_cast_fp16 = mul(x = var_1220_cast_fp16, y = var_1221_to_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1224_equation_0 = const()[name = tensor("op_1224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1224_cast_fp16 = einsum(equation = var_1224_equation_0, values = (var_1030_cast_fp16, var_936_cast_fp16))[name = tensor("op_1224_cast_fp16")]; + tensor var_1225_to_fp16 = const()[name = tensor("op_1225_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1226_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1030_cast_fp16, var_940_cast_fp16))[name = tensor("op_1228_cast_fp16")]; + tensor var_1229_to_fp16 = const()[name = tensor("op_1229_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1230_cast_fp16 = mul(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1042_cast_fp16, var_944_cast_fp16))[name = tensor("op_1232_cast_fp16")]; + tensor var_1233_to_fp16 = const()[name = tensor("op_1233_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1234_cast_fp16 = mul(x = var_1232_cast_fp16, y = var_1233_to_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1042_cast_fp16, var_948_cast_fp16))[name = tensor("op_1236_cast_fp16")]; + tensor var_1237_to_fp16 = const()[name = tensor("op_1237_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1238_cast_fp16 = mul(x = var_1236_cast_fp16, y = var_1237_to_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1042_cast_fp16, var_952_cast_fp16))[name = tensor("op_1240_cast_fp16")]; + tensor var_1241_to_fp16 = const()[name = tensor("op_1241_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1242_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1054_cast_fp16, var_956_cast_fp16))[name = tensor("op_1244_cast_fp16")]; + tensor var_1245_to_fp16 = const()[name = tensor("op_1245_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1246_cast_fp16 = mul(x = var_1244_cast_fp16, y = var_1245_to_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1054_cast_fp16, var_960_cast_fp16))[name = tensor("op_1248_cast_fp16")]; + tensor var_1249_to_fp16 = const()[name = tensor("op_1249_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1250_cast_fp16 = mul(x = var_1248_cast_fp16, y = var_1249_to_fp16)[name = tensor("op_1250_cast_fp16")]; + tensor var_1252_equation_0 = const()[name = tensor("op_1252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1054_cast_fp16, var_964_cast_fp16))[name = tensor("op_1252_cast_fp16")]; + tensor var_1253_to_fp16 = const()[name = tensor("op_1253_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1254_cast_fp16 = mul(x = var_1252_cast_fp16, y = var_1253_to_fp16)[name = tensor("op_1254_cast_fp16")]; + tensor var_1256_equation_0 = const()[name = tensor("op_1256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1066_cast_fp16, var_968_cast_fp16))[name = tensor("op_1256_cast_fp16")]; + tensor var_1257_to_fp16 = const()[name = tensor("op_1257_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1258_cast_fp16 = mul(x = var_1256_cast_fp16, y = var_1257_to_fp16)[name = tensor("op_1258_cast_fp16")]; + tensor var_1260_equation_0 = const()[name = tensor("op_1260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1066_cast_fp16, var_972_cast_fp16))[name = tensor("op_1260_cast_fp16")]; + tensor var_1261_to_fp16 = const()[name = tensor("op_1261_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1262_cast_fp16 = mul(x = var_1260_cast_fp16, y = var_1261_to_fp16)[name = tensor("op_1262_cast_fp16")]; + tensor var_1264_equation_0 = const()[name = tensor("op_1264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1066_cast_fp16, var_976_cast_fp16))[name = tensor("op_1264_cast_fp16")]; + tensor var_1265_to_fp16 = const()[name = tensor("op_1265_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1266_cast_fp16 = mul(x = var_1264_cast_fp16, y = var_1265_to_fp16)[name = tensor("op_1266_cast_fp16")]; + tensor var_1268_equation_0 = const()[name = tensor("op_1268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1078_cast_fp16, var_980_cast_fp16))[name = tensor("op_1268_cast_fp16")]; + tensor var_1269_to_fp16 = const()[name = tensor("op_1269_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1270_cast_fp16 = mul(x = var_1268_cast_fp16, y = var_1269_to_fp16)[name = tensor("op_1270_cast_fp16")]; + tensor var_1272_equation_0 = const()[name = tensor("op_1272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1078_cast_fp16, var_984_cast_fp16))[name = tensor("op_1272_cast_fp16")]; + tensor var_1273_to_fp16 = const()[name = tensor("op_1273_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1274_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = tensor("op_1274_cast_fp16")]; + tensor var_1276_equation_0 = const()[name = tensor("op_1276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1078_cast_fp16, var_988_cast_fp16))[name = tensor("op_1276_cast_fp16")]; + tensor var_1277_to_fp16 = const()[name = tensor("op_1277_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1278_cast_fp16 = mul(x = var_1276_cast_fp16, y = var_1277_to_fp16)[name = tensor("op_1278_cast_fp16")]; + tensor var_1280_equation_0 = const()[name = tensor("op_1280_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1090_cast_fp16, var_992_cast_fp16))[name = tensor("op_1280_cast_fp16")]; + tensor var_1281_to_fp16 = const()[name = tensor("op_1281_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1282_cast_fp16 = mul(x = var_1280_cast_fp16, y = var_1281_to_fp16)[name = tensor("op_1282_cast_fp16")]; + tensor var_1284_equation_0 = const()[name = tensor("op_1284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1284_cast_fp16 = einsum(equation = var_1284_equation_0, values = (var_1090_cast_fp16, var_996_cast_fp16))[name = tensor("op_1284_cast_fp16")]; + tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1286_cast_fp16 = mul(x = var_1284_cast_fp16, y = var_1285_to_fp16)[name = tensor("op_1286_cast_fp16")]; + tensor var_1288_equation_0 = const()[name = tensor("op_1288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1288_cast_fp16 = einsum(equation = var_1288_equation_0, values = (var_1090_cast_fp16, var_1000_cast_fp16))[name = tensor("op_1288_cast_fp16")]; + tensor var_1289_to_fp16 = const()[name = tensor("op_1289_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1290_cast_fp16 = mul(x = var_1288_cast_fp16, y = var_1289_to_fp16)[name = tensor("op_1290_cast_fp16")]; + tensor var_1292_equation_0 = const()[name = tensor("op_1292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1292_cast_fp16 = einsum(equation = var_1292_equation_0, values = (var_1102_cast_fp16, var_1004_cast_fp16))[name = tensor("op_1292_cast_fp16")]; + tensor var_1293_to_fp16 = const()[name = tensor("op_1293_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1294_cast_fp16 = mul(x = var_1292_cast_fp16, y = var_1293_to_fp16)[name = tensor("op_1294_cast_fp16")]; + tensor var_1296_equation_0 = const()[name = tensor("op_1296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1296_cast_fp16 = einsum(equation = var_1296_equation_0, values = (var_1102_cast_fp16, var_1008_cast_fp16))[name = tensor("op_1296_cast_fp16")]; + tensor var_1297_to_fp16 = const()[name = tensor("op_1297_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1298_cast_fp16 = mul(x = var_1296_cast_fp16, y = var_1297_to_fp16)[name = tensor("op_1298_cast_fp16")]; + tensor var_1300_equation_0 = const()[name = tensor("op_1300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1300_cast_fp16 = einsum(equation = var_1300_equation_0, values = (var_1102_cast_fp16, var_1012_cast_fp16))[name = tensor("op_1300_cast_fp16")]; + tensor var_1301_to_fp16 = const()[name = tensor("op_1301_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1302_cast_fp16 = mul(x = var_1300_cast_fp16, y = var_1301_to_fp16)[name = tensor("op_1302_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_1210_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_1214_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_1218_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_1222_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_1226_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_1230_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_1234_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_1238_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1242_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1246_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1250_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1254_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1258_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1262_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1266_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1270_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1274_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1278_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1282_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1286_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1290_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1294_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1298_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_1302_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_1327_cast_fp16 = softmax(axis = var_779, x = aw_49_cast_fp16)[name = tensor("op_1327_cast_fp16")]; + tensor var_1328_cast_fp16 = softmax(axis = var_779, x = aw_51_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1329_cast_fp16 = softmax(axis = var_779, x = aw_53_cast_fp16)[name = tensor("op_1329_cast_fp16")]; + tensor var_1330_cast_fp16 = softmax(axis = var_779, x = aw_55_cast_fp16)[name = tensor("op_1330_cast_fp16")]; + tensor var_1331_cast_fp16 = softmax(axis = var_779, x = aw_57_cast_fp16)[name = tensor("op_1331_cast_fp16")]; + tensor var_1332_cast_fp16 = softmax(axis = var_779, x = aw_59_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1333_cast_fp16 = softmax(axis = var_779, x = aw_61_cast_fp16)[name = tensor("op_1333_cast_fp16")]; + tensor var_1334_cast_fp16 = softmax(axis = var_779, x = aw_63_cast_fp16)[name = tensor("op_1334_cast_fp16")]; + tensor var_1335_cast_fp16 = softmax(axis = var_779, x = aw_65_cast_fp16)[name = tensor("op_1335_cast_fp16")]; + tensor var_1336_cast_fp16 = softmax(axis = var_779, x = aw_67_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1337_cast_fp16 = softmax(axis = var_779, x = aw_69_cast_fp16)[name = tensor("op_1337_cast_fp16")]; + tensor var_1338_cast_fp16 = softmax(axis = var_779, x = aw_71_cast_fp16)[name = tensor("op_1338_cast_fp16")]; + tensor var_1339_cast_fp16 = softmax(axis = var_779, x = aw_73_cast_fp16)[name = tensor("op_1339_cast_fp16")]; + tensor var_1340_cast_fp16 = softmax(axis = var_779, x = aw_75_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1341_cast_fp16 = softmax(axis = var_779, x = aw_77_cast_fp16)[name = tensor("op_1341_cast_fp16")]; + tensor var_1342_cast_fp16 = softmax(axis = var_779, x = aw_79_cast_fp16)[name = tensor("op_1342_cast_fp16")]; + tensor var_1343_cast_fp16 = softmax(axis = var_779, x = aw_81_cast_fp16)[name = tensor("op_1343_cast_fp16")]; + tensor var_1344_cast_fp16 = softmax(axis = var_779, x = aw_83_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1345_cast_fp16 = softmax(axis = var_779, x = aw_85_cast_fp16)[name = tensor("op_1345_cast_fp16")]; + tensor var_1346_cast_fp16 = softmax(axis = var_779, x = aw_87_cast_fp16)[name = tensor("op_1346_cast_fp16")]; + tensor var_1347_cast_fp16 = softmax(axis = var_779, x = aw_89_cast_fp16)[name = tensor("op_1347_cast_fp16")]; + tensor var_1348_cast_fp16 = softmax(axis = var_779, x = aw_91_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1349_cast_fp16 = softmax(axis = var_779, x = aw_93_cast_fp16)[name = tensor("op_1349_cast_fp16")]; + tensor var_1350_cast_fp16 = softmax(axis = var_779, x = aw_cast_fp16)[name = tensor("op_1350_cast_fp16")]; + tensor var_1352_equation_0 = const()[name = tensor("op_1352_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1352_cast_fp16 = einsum(equation = var_1352_equation_0, values = (var_1112_cast_fp16, var_1327_cast_fp16))[name = tensor("op_1352_cast_fp16")]; + tensor var_1354_equation_0 = const()[name = tensor("op_1354_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1354_cast_fp16 = einsum(equation = var_1354_equation_0, values = (var_1112_cast_fp16, var_1328_cast_fp16))[name = tensor("op_1354_cast_fp16")]; + tensor var_1356_equation_0 = const()[name = tensor("op_1356_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1356_cast_fp16 = einsum(equation = var_1356_equation_0, values = (var_1112_cast_fp16, var_1329_cast_fp16))[name = tensor("op_1356_cast_fp16")]; + tensor var_1358_equation_0 = const()[name = tensor("op_1358_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1358_cast_fp16 = einsum(equation = var_1358_equation_0, values = (var_1124_cast_fp16, var_1330_cast_fp16))[name = tensor("op_1358_cast_fp16")]; + tensor var_1360_equation_0 = const()[name = tensor("op_1360_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1360_cast_fp16 = einsum(equation = var_1360_equation_0, values = (var_1124_cast_fp16, var_1331_cast_fp16))[name = tensor("op_1360_cast_fp16")]; + tensor var_1362_equation_0 = const()[name = tensor("op_1362_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1362_cast_fp16 = einsum(equation = var_1362_equation_0, values = (var_1124_cast_fp16, var_1332_cast_fp16))[name = tensor("op_1362_cast_fp16")]; + tensor var_1364_equation_0 = const()[name = tensor("op_1364_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1364_cast_fp16 = einsum(equation = var_1364_equation_0, values = (var_1136_cast_fp16, var_1333_cast_fp16))[name = tensor("op_1364_cast_fp16")]; + tensor var_1366_equation_0 = const()[name = tensor("op_1366_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1366_cast_fp16 = einsum(equation = var_1366_equation_0, values = (var_1136_cast_fp16, var_1334_cast_fp16))[name = tensor("op_1366_cast_fp16")]; + tensor var_1368_equation_0 = const()[name = tensor("op_1368_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1368_cast_fp16 = einsum(equation = var_1368_equation_0, values = (var_1136_cast_fp16, var_1335_cast_fp16))[name = tensor("op_1368_cast_fp16")]; + tensor var_1370_equation_0 = const()[name = tensor("op_1370_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1370_cast_fp16 = einsum(equation = var_1370_equation_0, values = (var_1148_cast_fp16, var_1336_cast_fp16))[name = tensor("op_1370_cast_fp16")]; + tensor var_1372_equation_0 = const()[name = tensor("op_1372_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1372_cast_fp16 = einsum(equation = var_1372_equation_0, values = (var_1148_cast_fp16, var_1337_cast_fp16))[name = tensor("op_1372_cast_fp16")]; + tensor var_1374_equation_0 = const()[name = tensor("op_1374_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1374_cast_fp16 = einsum(equation = var_1374_equation_0, values = (var_1148_cast_fp16, var_1338_cast_fp16))[name = tensor("op_1374_cast_fp16")]; + tensor var_1376_equation_0 = const()[name = tensor("op_1376_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1376_cast_fp16 = einsum(equation = var_1376_equation_0, values = (var_1160_cast_fp16, var_1339_cast_fp16))[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_equation_0 = const()[name = tensor("op_1378_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1378_cast_fp16 = einsum(equation = var_1378_equation_0, values = (var_1160_cast_fp16, var_1340_cast_fp16))[name = tensor("op_1378_cast_fp16")]; + tensor var_1380_equation_0 = const()[name = tensor("op_1380_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1380_cast_fp16 = einsum(equation = var_1380_equation_0, values = (var_1160_cast_fp16, var_1341_cast_fp16))[name = tensor("op_1380_cast_fp16")]; + tensor var_1382_equation_0 = const()[name = tensor("op_1382_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1382_cast_fp16 = einsum(equation = var_1382_equation_0, values = (var_1172_cast_fp16, var_1342_cast_fp16))[name = tensor("op_1382_cast_fp16")]; + tensor var_1384_equation_0 = const()[name = tensor("op_1384_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1384_cast_fp16 = einsum(equation = var_1384_equation_0, values = (var_1172_cast_fp16, var_1343_cast_fp16))[name = tensor("op_1384_cast_fp16")]; + tensor var_1386_equation_0 = const()[name = tensor("op_1386_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1386_cast_fp16 = einsum(equation = var_1386_equation_0, values = (var_1172_cast_fp16, var_1344_cast_fp16))[name = tensor("op_1386_cast_fp16")]; + tensor var_1388_equation_0 = const()[name = tensor("op_1388_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1388_cast_fp16 = einsum(equation = var_1388_equation_0, values = (var_1184_cast_fp16, var_1345_cast_fp16))[name = tensor("op_1388_cast_fp16")]; + tensor var_1390_equation_0 = const()[name = tensor("op_1390_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1390_cast_fp16 = einsum(equation = var_1390_equation_0, values = (var_1184_cast_fp16, var_1346_cast_fp16))[name = tensor("op_1390_cast_fp16")]; + tensor var_1392_equation_0 = const()[name = tensor("op_1392_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1392_cast_fp16 = einsum(equation = var_1392_equation_0, values = (var_1184_cast_fp16, var_1347_cast_fp16))[name = tensor("op_1392_cast_fp16")]; + tensor var_1394_equation_0 = const()[name = tensor("op_1394_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1394_cast_fp16 = einsum(equation = var_1394_equation_0, values = (var_1196_cast_fp16, var_1348_cast_fp16))[name = tensor("op_1394_cast_fp16")]; + tensor var_1396_equation_0 = const()[name = tensor("op_1396_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1396_cast_fp16 = einsum(equation = var_1396_equation_0, values = (var_1196_cast_fp16, var_1349_cast_fp16))[name = tensor("op_1396_cast_fp16")]; + tensor var_1398_equation_0 = const()[name = tensor("op_1398_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1398_cast_fp16 = einsum(equation = var_1398_equation_0, values = (var_1196_cast_fp16, var_1350_cast_fp16))[name = tensor("op_1398_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_779, interleave = x_27_interleave_0, values = (var_1352_cast_fp16, var_1354_cast_fp16, var_1356_cast_fp16, var_1358_cast_fp16, var_1360_cast_fp16, var_1362_cast_fp16, var_1364_cast_fp16, var_1366_cast_fp16, var_1368_cast_fp16, var_1370_cast_fp16, var_1372_cast_fp16, var_1374_cast_fp16, var_1376_cast_fp16, var_1378_cast_fp16, var_1380_cast_fp16, var_1382_cast_fp16, var_1384_cast_fp16, var_1386_cast_fp16, var_1388_cast_fp16, var_1390_cast_fp16, var_1392_cast_fp16, var_1394_cast_fp16, var_1396_cast_fp16, var_1398_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1403 = const()[name = tensor("op_1403"), val = tensor([1, 3072, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1403, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1406 = const()[name = tensor("op_1406"), val = tensor([1, 1])]; + tensor var_1408 = const()[name = tensor("op_1408"), val = tensor([1, 1])]; + tensor attention_output_pad_type_0 = const()[name = tensor("attention_output_pad_type_0"), val = tensor("custom")]; + tensor attention_output_pad_0 = const()[name = tensor("attention_output_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232803776)))]; + tensor attention_output_cast_fp16 = conv(dilations = var_1408, groups = var_779, pad = attention_output_pad_0, pad_type = attention_output_pad_type_0, strides = var_1406, weight = blocks_1_attn_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678208)))]; + tensor x_eps_cast_fp16 = concat(axis = var_779, interleave = x_eps_interleave_0, values = (x_29_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_782, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1434_to_fp16 = const()[name = tensor("op_1434_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1434_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678400)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1445 = const()[name = tensor("op_1445"), val = tensor([1, 1])]; + tensor var_1447 = const()[name = tensor("op_1447"), val = tensor([1, 1])]; + tensor input_17_pad_type_0 = const()[name = tensor("input_17_pad_type_0"), val = tensor("custom")]; + tensor input_17_pad_0 = const()[name = tensor("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251684608)))]; + tensor input_17_cast_fp16 = conv(dilations = var_1447, groups = var_779, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_1445, weight = blocks_1_mlp_fc_1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1451 = const()[name = tensor("op_1451"), val = tensor([1, 1])]; + tensor var_1453 = const()[name = tensor("op_1453"), val = tensor([1, 1])]; + tensor x_fc_2_pad_type_0 = const()[name = tensor("x_fc_2_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_pad_0 = const()[name = tensor("x_fc_2_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302016320)))]; + tensor x_fc_2_cast_fp16 = conv(dilations = var_1453, groups = var_779, pad = x_fc_2_pad_0, pad_type = x_fc_2_pad_type_0, strides = var_1451, weight = blocks_1_mlp_fc_2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_1456_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1456_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_1456_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_1459 = const()[name = tensor("op_1459"), val = tensor([1, 1])]; + tensor var_1461 = const()[name = tensor("op_1461"), val = tensor([1, 1])]; + tensor var_1463_pad_type_0 = const()[name = tensor("op_1463_pad_type_0"), val = tensor("custom")]; + tensor var_1463_pad_0 = const()[name = tensor("op_1463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(352348032)))]; + tensor var_1463_cast_fp16 = conv(dilations = var_1461, groups = var_779, pad = var_1463_pad_0, pad_type = var_1463_pad_type_0, strides = var_1459, weight = blocks_1_mlp_proj_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_1463_cast_fp16")]; + tensor new_x = add(x = var_1463_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_1464_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_v_cache_0, new_k_cache_1, new_v_cache_1); +} \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk3.mlmodelc/weights/weight.bin b/Llama-3.2-3B-Instruct_chunk3.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..ddc414b805bbeaeee2f33bbeffef46d892a80c51 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk3.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:934f835704cd4576365155fea7f05c7308ec2dc8b0c69d6d800fdc6e646ea0ce +size 402679744 diff --git a/Llama-3.2-3B-Instruct_chunk4.mlmodelc/analytics/coremldata.bin b/Llama-3.2-3B-Instruct_chunk4.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a63af39cde8e590e41fffd270ab8aede737490d --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk4.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf21e446e7587de3fd840eae95f3e79729298df568725552f7ef5fd8f954e58c +size 243 diff --git a/Llama-3.2-3B-Instruct_chunk4.mlmodelc/coremldata.bin b/Llama-3.2-3B-Instruct_chunk4.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..3fed05170d981b8582c9421ec7550f748512caf2 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk4.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b45f96f9ba201e16f197a78412041f41d2ac869df9ad95ef03af7662e7d940 +size 653 diff --git a/Llama-3.2-3B-Instruct_chunk4.mlmodelc/metadata.json b/Llama-3.2-3B-Instruct_chunk4.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..078216a203d40b09afa365c7afa972ffa9fe3493 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk4.mlmodelc/metadata.json @@ -0,0 +1,178 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 14, + "Ios16.mul" : 70, + "SliceByIndex" : 88, + "Transpose" : 2, + "Ios16.einsum" : 96, + "Ios16.conv" : 14, + "Ios16.add" : 56, + "Ios16.realDiv" : 4, + "Ios16.softmax" : 48, + "Ios16.reduceL2Norm" : 4, + "Ios16.reshape" : 14, + "Ios16.silu" : 2 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 512, 1, 64]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_3_2_3B_Instruct_2024_11_09_16_14_37_chunk4", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk4.mlmodelc/model.mil b/Llama-3.2-3B-Instruct_chunk4.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..78594b4291dc45ae43652f9a31200581b19ad3c6 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk4.mlmodelc/model.mil @@ -0,0 +1,956 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { + tensor var_13 = const()[name = tensor("op_13"), val = tensor(-1)]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-2)]; + tensor var_19 = const()[name = tensor("op_19"), val = tensor(-3)]; + tensor var_52 = const()[name = tensor("op_52"), val = tensor(1)]; + tensor var_55 = const()[name = tensor("op_55"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_52, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_55, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_79_to_fp16 = const()[name = tensor("op_79_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_79_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_100 = const()[name = tensor("op_100"), val = tensor([1, 3072, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_100, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_103 = const()[name = tensor("op_103"), val = tensor([1, 1])]; + tensor var_105 = const()[name = tensor("op_105"), val = tensor([1, 1])]; + tensor q_1_pad_type_0 = const()[name = tensor("q_1_pad_type_0"), val = tensor("custom")]; + tensor q_1_pad_0 = const()[name = tensor("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6464)))]; + tensor q_1_cast_fp16 = conv(dilations = var_105, groups = var_52, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = var_103, weight = blocks_0_attn_q_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_109 = const()[name = tensor("op_109"), val = tensor([1, 1])]; + tensor var_111 = const()[name = tensor("op_111"), val = tensor([1, 1])]; + tensor k_1_pad_type_0 = const()[name = tensor("k_1_pad_type_0"), val = tensor("custom")]; + tensor k_1_pad_0 = const()[name = tensor("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18880896)))]; + tensor k_1_cast_fp16 = conv(dilations = var_111, groups = var_52, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = var_109, weight = blocks_0_attn_k_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_115 = const()[name = tensor("op_115"), val = tensor([1, 1])]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor v_1_pad_type_0 = const()[name = tensor("v_1_pad_type_0"), val = tensor("custom")]; + tensor v_1_pad_0 = const()[name = tensor("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25172416)))]; + tensor v_1_cast_fp16 = conv(dilations = var_117, groups = var_52, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = var_115, weight = blocks_0_attn_v_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_120 = const()[name = tensor("op_120"), val = tensor([1, 24, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_120, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_122 = const()[name = tensor("op_122"), val = tensor([1, -1, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_122, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_136_begin_0 = const()[name = tensor("op_136_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_136_end_0 = const()[name = tensor("op_136_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_136_end_mask_0 = const()[name = tensor("op_136_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_136_cast_fp16 = slice_by_index(begin = var_136_begin_0, end = var_136_end_0, end_mask = var_136_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_136_cast_fp16")]; + tensor var_142_begin_0 = const()[name = tensor("op_142_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_142_end_0 = const()[name = tensor("op_142_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_142_end_mask_0 = const()[name = tensor("op_142_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_142_cast_fp16 = slice_by_index(begin = var_142_begin_0, end = var_142_end_0, end_mask = var_142_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_142_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_144_cast_fp16 = mul(x = var_142_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_144_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_17, interleave = rotated_1_interleave_0, values = (var_144_cast_fp16, var_136_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_147_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_147_cast_fp16")]; + tensor var_148_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_148_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_147_cast_fp16, y = var_148_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor var_167_begin_0 = const()[name = tensor("op_167_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_167_end_0 = const()[name = tensor("op_167_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_167_end_mask_0 = const()[name = tensor("op_167_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_167_cast_fp16 = slice_by_index(begin = var_167_begin_0, end = var_167_end_0, end_mask = var_167_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_167_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_169_cast_fp16 = mul(x = var_167_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_169_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_17, interleave = rotated_3_interleave_0, values = (var_169_cast_fp16, var_161_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_172_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_172_cast_fp16")]; + tensor var_173_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_173_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_172_cast_fp16, y = var_173_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_176 = const()[name = tensor("op_176"), val = tensor([1, -1, 1, 64])]; + tensor k_7_cast_fp16 = reshape(shape = var_176, x = roped_3_cast_fp16)[name = tensor("k_7_cast_fp16")]; + tensor var_178 = const()[name = tensor("op_178"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_178, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_11_interleave_0 = const()[name = tensor("k_11_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_9_perm_0, x = k_7_cast_fp16)[name = tensor("transpose_1")]; + tensor k_11_cast_fp16 = concat(axis = var_19, interleave = k_11_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_11_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_13, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1, 3072, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_186, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_191_begin_0 = const()[name = tensor("op_191_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_191_end_0 = const()[name = tensor("op_191_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_191_end_mask_0 = const()[name = tensor("op_191_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_191_cast_fp16 = slice_by_index(begin = var_191_begin_0, end = var_191_end_0, end_mask = var_191_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_191_cast_fp16")]; + tensor var_195_begin_0 = const()[name = tensor("op_195_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_195_end_0 = const()[name = tensor("op_195_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_195_end_mask_0 = const()[name = tensor("op_195_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_195_cast_fp16 = slice_by_index(begin = var_195_begin_0, end = var_195_end_0, end_mask = var_195_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_195_cast_fp16")]; + tensor var_199_begin_0 = const()[name = tensor("op_199_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_199_end_0 = const()[name = tensor("op_199_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_199_end_mask_0 = const()[name = tensor("op_199_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_199_cast_fp16 = slice_by_index(begin = var_199_begin_0, end = var_199_end_0, end_mask = var_199_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_199_cast_fp16")]; + tensor var_203_begin_0 = const()[name = tensor("op_203_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_203_end_0 = const()[name = tensor("op_203_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_203_end_mask_0 = const()[name = tensor("op_203_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_203_cast_fp16 = slice_by_index(begin = var_203_begin_0, end = var_203_end_0, end_mask = var_203_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_203_cast_fp16")]; + tensor var_207_begin_0 = const()[name = tensor("op_207_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_207_end_0 = const()[name = tensor("op_207_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_207_end_mask_0 = const()[name = tensor("op_207_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = var_207_end_0, end_mask = var_207_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_207_cast_fp16")]; + tensor var_211_begin_0 = const()[name = tensor("op_211_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_211_end_0 = const()[name = tensor("op_211_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_211_end_mask_0 = const()[name = tensor("op_211_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_211_cast_fp16 = slice_by_index(begin = var_211_begin_0, end = var_211_end_0, end_mask = var_211_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_211_cast_fp16")]; + tensor var_215_begin_0 = const()[name = tensor("op_215_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_215_end_0 = const()[name = tensor("op_215_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_215_end_mask_0 = const()[name = tensor("op_215_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_215_cast_fp16 = slice_by_index(begin = var_215_begin_0, end = var_215_end_0, end_mask = var_215_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_215_cast_fp16")]; + tensor var_219_begin_0 = const()[name = tensor("op_219_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_219_end_0 = const()[name = tensor("op_219_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_219_end_mask_0 = const()[name = tensor("op_219_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_219_cast_fp16 = slice_by_index(begin = var_219_begin_0, end = var_219_end_0, end_mask = var_219_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_219_cast_fp16")]; + tensor var_223_begin_0 = const()[name = tensor("op_223_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_223_end_0 = const()[name = tensor("op_223_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_223_end_mask_0 = const()[name = tensor("op_223_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_223_cast_fp16 = slice_by_index(begin = var_223_begin_0, end = var_223_end_0, end_mask = var_223_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_223_cast_fp16")]; + tensor var_227_begin_0 = const()[name = tensor("op_227_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_227_end_0 = const()[name = tensor("op_227_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_227_end_mask_0 = const()[name = tensor("op_227_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_227_cast_fp16")]; + tensor var_231_begin_0 = const()[name = tensor("op_231_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_231_end_0 = const()[name = tensor("op_231_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_231_end_mask_0 = const()[name = tensor("op_231_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_231_cast_fp16 = slice_by_index(begin = var_231_begin_0, end = var_231_end_0, end_mask = var_231_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_231_cast_fp16")]; + tensor var_235_begin_0 = const()[name = tensor("op_235_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_235_end_0 = const()[name = tensor("op_235_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_235_end_mask_0 = const()[name = tensor("op_235_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = var_235_end_0, end_mask = var_235_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_235_cast_fp16")]; + tensor var_239_begin_0 = const()[name = tensor("op_239_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_239_end_0 = const()[name = tensor("op_239_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_239_end_mask_0 = const()[name = tensor("op_239_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_239_cast_fp16")]; + tensor var_243_begin_0 = const()[name = tensor("op_243_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_243_end_0 = const()[name = tensor("op_243_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_243_end_mask_0 = const()[name = tensor("op_243_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_243_cast_fp16")]; + tensor var_247_begin_0 = const()[name = tensor("op_247_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_247_end_0 = const()[name = tensor("op_247_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_247_end_mask_0 = const()[name = tensor("op_247_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_247_cast_fp16")]; + tensor var_251_begin_0 = const()[name = tensor("op_251_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_251_end_0 = const()[name = tensor("op_251_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_251_end_mask_0 = const()[name = tensor("op_251_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_251_cast_fp16")]; + tensor var_255_begin_0 = const()[name = tensor("op_255_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_255_end_0 = const()[name = tensor("op_255_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_255_end_mask_0 = const()[name = tensor("op_255_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_255_cast_fp16")]; + tensor var_259_begin_0 = const()[name = tensor("op_259_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_259_end_0 = const()[name = tensor("op_259_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_259_end_mask_0 = const()[name = tensor("op_259_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_263_cast_fp16")]; + tensor var_267_begin_0 = const()[name = tensor("op_267_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_267_end_0 = const()[name = tensor("op_267_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_267_end_mask_0 = const()[name = tensor("op_267_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_267_cast_fp16")]; + tensor var_271_begin_0 = const()[name = tensor("op_271_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_271_end_0 = const()[name = tensor("op_271_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_271_end_mask_0 = const()[name = tensor("op_271_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_271_cast_fp16")]; + tensor var_275_begin_0 = const()[name = tensor("op_275_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_275_end_0 = const()[name = tensor("op_275_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_275_end_mask_0 = const()[name = tensor("op_275_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_275_cast_fp16")]; + tensor var_279_begin_0 = const()[name = tensor("op_279_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_279_end_0 = const()[name = tensor("op_279_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_279_end_mask_0 = const()[name = tensor("op_279_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_279_cast_fp16")]; + tensor var_283_begin_0 = const()[name = tensor("op_283_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_283_end_0 = const()[name = tensor("op_283_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_283_end_mask_0 = const()[name = tensor("op_283_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_283_cast_fp16")]; + tensor var_289_begin_0 = const()[name = tensor("op_289_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_289_end_0 = const()[name = tensor("op_289_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_289_end_mask_0 = const()[name = tensor("op_289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_289_cast_fp16")]; + tensor var_301_begin_0 = const()[name = tensor("op_301_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_301_end_0 = const()[name = tensor("op_301_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_301_end_mask_0 = const()[name = tensor("op_301_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_301_cast_fp16")]; + tensor var_313_begin_0 = const()[name = tensor("op_313_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_313_end_0 = const()[name = tensor("op_313_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_313_end_mask_0 = const()[name = tensor("op_313_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_313_cast_fp16 = slice_by_index(begin = var_313_begin_0, end = var_313_end_0, end_mask = var_313_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_313_cast_fp16")]; + tensor var_325_begin_0 = const()[name = tensor("op_325_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_325_end_0 = const()[name = tensor("op_325_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_325_end_mask_0 = const()[name = tensor("op_325_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_325_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_349_begin_0 = const()[name = tensor("op_349_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_349_end_0 = const()[name = tensor("op_349_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_349_end_mask_0 = const()[name = tensor("op_349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_349_cast_fp16 = slice_by_index(begin = var_349_begin_0, end = var_349_end_0, end_mask = var_349_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_349_cast_fp16")]; + tensor var_361_begin_0 = const()[name = tensor("op_361_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_361_end_0 = const()[name = tensor("op_361_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_361_end_mask_0 = const()[name = tensor("op_361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_361_cast_fp16 = slice_by_index(begin = var_361_begin_0, end = var_361_end_0, end_mask = var_361_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_361_cast_fp16")]; + tensor var_373_begin_0 = const()[name = tensor("op_373_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_373_end_0 = const()[name = tensor("op_373_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_373_end_mask_0 = const()[name = tensor("op_373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_373_cast_fp16")]; + tensor var_383_begin_0 = const()[name = tensor("op_383_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_383_end_0 = const()[name = tensor("op_383_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_383_end_mask_0 = const()[name = tensor("op_383_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_383_cast_fp16")]; + tensor var_395_begin_0 = const()[name = tensor("op_395_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_395_end_0 = const()[name = tensor("op_395_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_395_end_mask_0 = const()[name = tensor("op_395_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_395_cast_fp16")]; + tensor var_407_begin_0 = const()[name = tensor("op_407_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_407_end_0 = const()[name = tensor("op_407_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_407_end_mask_0 = const()[name = tensor("op_407_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_407_cast_fp16 = slice_by_index(begin = var_407_begin_0, end = var_407_end_0, end_mask = var_407_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_407_cast_fp16")]; + tensor var_419_begin_0 = const()[name = tensor("op_419_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_419_end_0 = const()[name = tensor("op_419_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_419_end_mask_0 = const()[name = tensor("op_419_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_419_cast_fp16 = slice_by_index(begin = var_419_begin_0, end = var_419_end_0, end_mask = var_419_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_419_cast_fp16")]; + tensor var_431_begin_0 = const()[name = tensor("op_431_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_431_end_0 = const()[name = tensor("op_431_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_431_end_mask_0 = const()[name = tensor("op_431_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_431_cast_fp16 = slice_by_index(begin = var_431_begin_0, end = var_431_end_0, end_mask = var_431_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_431_cast_fp16")]; + tensor var_443_begin_0 = const()[name = tensor("op_443_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_443_end_0 = const()[name = tensor("op_443_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_443_end_mask_0 = const()[name = tensor("op_443_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_443_cast_fp16 = slice_by_index(begin = var_443_begin_0, end = var_443_end_0, end_mask = var_443_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_443_cast_fp16")]; + tensor var_455_begin_0 = const()[name = tensor("op_455_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_455_end_0 = const()[name = tensor("op_455_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_455_end_mask_0 = const()[name = tensor("op_455_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_455_cast_fp16")]; + tensor var_467_begin_0 = const()[name = tensor("op_467_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_467_end_0 = const()[name = tensor("op_467_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_467_end_mask_0 = const()[name = tensor("op_467_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_467_cast_fp16 = slice_by_index(begin = var_467_begin_0, end = var_467_end_0, end_mask = var_467_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_467_cast_fp16")]; + tensor var_479_equation_0 = const()[name = tensor("op_479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_289_cast_fp16, var_191_cast_fp16))[name = tensor("op_479_cast_fp16")]; + tensor var_480_to_fp16 = const()[name = tensor("op_480_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_481_cast_fp16 = mul(x = var_479_cast_fp16, y = var_480_to_fp16)[name = tensor("op_481_cast_fp16")]; + tensor var_483_equation_0 = const()[name = tensor("op_483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_289_cast_fp16, var_195_cast_fp16))[name = tensor("op_483_cast_fp16")]; + tensor var_484_to_fp16 = const()[name = tensor("op_484_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_485_cast_fp16 = mul(x = var_483_cast_fp16, y = var_484_to_fp16)[name = tensor("op_485_cast_fp16")]; + tensor var_487_equation_0 = const()[name = tensor("op_487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_487_cast_fp16 = einsum(equation = var_487_equation_0, values = (var_289_cast_fp16, var_199_cast_fp16))[name = tensor("op_487_cast_fp16")]; + tensor var_488_to_fp16 = const()[name = tensor("op_488_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_489_cast_fp16 = mul(x = var_487_cast_fp16, y = var_488_to_fp16)[name = tensor("op_489_cast_fp16")]; + tensor var_491_equation_0 = const()[name = tensor("op_491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_491_cast_fp16 = einsum(equation = var_491_equation_0, values = (var_301_cast_fp16, var_203_cast_fp16))[name = tensor("op_491_cast_fp16")]; + tensor var_492_to_fp16 = const()[name = tensor("op_492_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_493_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = tensor("op_493_cast_fp16")]; + tensor var_495_equation_0 = const()[name = tensor("op_495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_495_cast_fp16 = einsum(equation = var_495_equation_0, values = (var_301_cast_fp16, var_207_cast_fp16))[name = tensor("op_495_cast_fp16")]; + tensor var_496_to_fp16 = const()[name = tensor("op_496_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_497_cast_fp16 = mul(x = var_495_cast_fp16, y = var_496_to_fp16)[name = tensor("op_497_cast_fp16")]; + tensor var_499_equation_0 = const()[name = tensor("op_499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_499_cast_fp16 = einsum(equation = var_499_equation_0, values = (var_301_cast_fp16, var_211_cast_fp16))[name = tensor("op_499_cast_fp16")]; + tensor var_500_to_fp16 = const()[name = tensor("op_500_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_501_cast_fp16 = mul(x = var_499_cast_fp16, y = var_500_to_fp16)[name = tensor("op_501_cast_fp16")]; + tensor var_503_equation_0 = const()[name = tensor("op_503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_503_cast_fp16 = einsum(equation = var_503_equation_0, values = (var_313_cast_fp16, var_215_cast_fp16))[name = tensor("op_503_cast_fp16")]; + tensor var_504_to_fp16 = const()[name = tensor("op_504_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_505_cast_fp16 = mul(x = var_503_cast_fp16, y = var_504_to_fp16)[name = tensor("op_505_cast_fp16")]; + tensor var_507_equation_0 = const()[name = tensor("op_507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_507_cast_fp16 = einsum(equation = var_507_equation_0, values = (var_313_cast_fp16, var_219_cast_fp16))[name = tensor("op_507_cast_fp16")]; + tensor var_508_to_fp16 = const()[name = tensor("op_508_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16, y = var_508_to_fp16)[name = tensor("op_509_cast_fp16")]; + tensor var_511_equation_0 = const()[name = tensor("op_511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_511_cast_fp16 = einsum(equation = var_511_equation_0, values = (var_313_cast_fp16, var_223_cast_fp16))[name = tensor("op_511_cast_fp16")]; + tensor var_512_to_fp16 = const()[name = tensor("op_512_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_513_cast_fp16 = mul(x = var_511_cast_fp16, y = var_512_to_fp16)[name = tensor("op_513_cast_fp16")]; + tensor var_515_equation_0 = const()[name = tensor("op_515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_515_cast_fp16 = einsum(equation = var_515_equation_0, values = (var_325_cast_fp16, var_227_cast_fp16))[name = tensor("op_515_cast_fp16")]; + tensor var_516_to_fp16 = const()[name = tensor("op_516_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_517_cast_fp16 = mul(x = var_515_cast_fp16, y = var_516_to_fp16)[name = tensor("op_517_cast_fp16")]; + tensor var_519_equation_0 = const()[name = tensor("op_519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_519_cast_fp16 = einsum(equation = var_519_equation_0, values = (var_325_cast_fp16, var_231_cast_fp16))[name = tensor("op_519_cast_fp16")]; + tensor var_520_to_fp16 = const()[name = tensor("op_520_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_521_cast_fp16 = mul(x = var_519_cast_fp16, y = var_520_to_fp16)[name = tensor("op_521_cast_fp16")]; + tensor var_523_equation_0 = const()[name = tensor("op_523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_325_cast_fp16, var_235_cast_fp16))[name = tensor("op_523_cast_fp16")]; + tensor var_524_to_fp16 = const()[name = tensor("op_524_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_525_cast_fp16 = mul(x = var_523_cast_fp16, y = var_524_to_fp16)[name = tensor("op_525_cast_fp16")]; + tensor var_527_equation_0 = const()[name = tensor("op_527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_337_cast_fp16, var_239_cast_fp16))[name = tensor("op_527_cast_fp16")]; + tensor var_528_to_fp16 = const()[name = tensor("op_528_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_529_cast_fp16 = mul(x = var_527_cast_fp16, y = var_528_to_fp16)[name = tensor("op_529_cast_fp16")]; + tensor var_531_equation_0 = const()[name = tensor("op_531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_337_cast_fp16, var_243_cast_fp16))[name = tensor("op_531_cast_fp16")]; + tensor var_532_to_fp16 = const()[name = tensor("op_532_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_533_cast_fp16 = mul(x = var_531_cast_fp16, y = var_532_to_fp16)[name = tensor("op_533_cast_fp16")]; + tensor var_535_equation_0 = const()[name = tensor("op_535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_337_cast_fp16, var_247_cast_fp16))[name = tensor("op_535_cast_fp16")]; + tensor var_536_to_fp16 = const()[name = tensor("op_536_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_537_cast_fp16 = mul(x = var_535_cast_fp16, y = var_536_to_fp16)[name = tensor("op_537_cast_fp16")]; + tensor var_539_equation_0 = const()[name = tensor("op_539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_539_cast_fp16 = einsum(equation = var_539_equation_0, values = (var_349_cast_fp16, var_251_cast_fp16))[name = tensor("op_539_cast_fp16")]; + tensor var_540_to_fp16 = const()[name = tensor("op_540_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_541_cast_fp16 = mul(x = var_539_cast_fp16, y = var_540_to_fp16)[name = tensor("op_541_cast_fp16")]; + tensor var_543_equation_0 = const()[name = tensor("op_543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_543_cast_fp16 = einsum(equation = var_543_equation_0, values = (var_349_cast_fp16, var_255_cast_fp16))[name = tensor("op_543_cast_fp16")]; + tensor var_544_to_fp16 = const()[name = tensor("op_544_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_545_cast_fp16 = mul(x = var_543_cast_fp16, y = var_544_to_fp16)[name = tensor("op_545_cast_fp16")]; + tensor var_547_equation_0 = const()[name = tensor("op_547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_349_cast_fp16, var_259_cast_fp16))[name = tensor("op_547_cast_fp16")]; + tensor var_548_to_fp16 = const()[name = tensor("op_548_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_549_cast_fp16 = mul(x = var_547_cast_fp16, y = var_548_to_fp16)[name = tensor("op_549_cast_fp16")]; + tensor var_551_equation_0 = const()[name = tensor("op_551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_361_cast_fp16, var_263_cast_fp16))[name = tensor("op_551_cast_fp16")]; + tensor var_552_to_fp16 = const()[name = tensor("op_552_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_553_cast_fp16 = mul(x = var_551_cast_fp16, y = var_552_to_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_555_equation_0 = const()[name = tensor("op_555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_361_cast_fp16, var_267_cast_fp16))[name = tensor("op_555_cast_fp16")]; + tensor var_556_to_fp16 = const()[name = tensor("op_556_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_557_cast_fp16 = mul(x = var_555_cast_fp16, y = var_556_to_fp16)[name = tensor("op_557_cast_fp16")]; + tensor var_559_equation_0 = const()[name = tensor("op_559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_361_cast_fp16, var_271_cast_fp16))[name = tensor("op_559_cast_fp16")]; + tensor var_560_to_fp16 = const()[name = tensor("op_560_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = var_560_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor var_563_equation_0 = const()[name = tensor("op_563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_373_cast_fp16, var_275_cast_fp16))[name = tensor("op_563_cast_fp16")]; + tensor var_564_to_fp16 = const()[name = tensor("op_564_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_565_cast_fp16 = mul(x = var_563_cast_fp16, y = var_564_to_fp16)[name = tensor("op_565_cast_fp16")]; + tensor var_567_equation_0 = const()[name = tensor("op_567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_373_cast_fp16, var_279_cast_fp16))[name = tensor("op_567_cast_fp16")]; + tensor var_568_to_fp16 = const()[name = tensor("op_568_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_569_cast_fp16 = mul(x = var_567_cast_fp16, y = var_568_to_fp16)[name = tensor("op_569_cast_fp16")]; + tensor var_571_equation_0 = const()[name = tensor("op_571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_373_cast_fp16, var_283_cast_fp16))[name = tensor("op_571_cast_fp16")]; + tensor var_572_to_fp16 = const()[name = tensor("op_572_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_573_cast_fp16 = mul(x = var_571_cast_fp16, y = var_572_to_fp16)[name = tensor("op_573_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_481_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_485_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_489_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_493_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_497_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_501_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_505_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_509_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_513_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_517_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_521_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_525_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_529_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_533_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_537_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_541_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_545_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_549_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_553_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_557_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_561_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_565_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_569_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_573_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor var_598_cast_fp16 = softmax(axis = var_52, x = aw_1_cast_fp16)[name = tensor("op_598_cast_fp16")]; + tensor var_599_cast_fp16 = softmax(axis = var_52, x = aw_3_cast_fp16)[name = tensor("op_599_cast_fp16")]; + tensor var_600_cast_fp16 = softmax(axis = var_52, x = aw_5_cast_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_601_cast_fp16 = softmax(axis = var_52, x = aw_7_cast_fp16)[name = tensor("op_601_cast_fp16")]; + tensor var_602_cast_fp16 = softmax(axis = var_52, x = aw_9_cast_fp16)[name = tensor("op_602_cast_fp16")]; + tensor var_603_cast_fp16 = softmax(axis = var_52, x = aw_11_cast_fp16)[name = tensor("op_603_cast_fp16")]; + tensor var_604_cast_fp16 = softmax(axis = var_52, x = aw_13_cast_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_605_cast_fp16 = softmax(axis = var_52, x = aw_15_cast_fp16)[name = tensor("op_605_cast_fp16")]; + tensor var_606_cast_fp16 = softmax(axis = var_52, x = aw_17_cast_fp16)[name = tensor("op_606_cast_fp16")]; + tensor var_607_cast_fp16 = softmax(axis = var_52, x = aw_19_cast_fp16)[name = tensor("op_607_cast_fp16")]; + tensor var_608_cast_fp16 = softmax(axis = var_52, x = aw_21_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_609_cast_fp16 = softmax(axis = var_52, x = aw_23_cast_fp16)[name = tensor("op_609_cast_fp16")]; + tensor var_610_cast_fp16 = softmax(axis = var_52, x = aw_25_cast_fp16)[name = tensor("op_610_cast_fp16")]; + tensor var_611_cast_fp16 = softmax(axis = var_52, x = aw_27_cast_fp16)[name = tensor("op_611_cast_fp16")]; + tensor var_612_cast_fp16 = softmax(axis = var_52, x = aw_29_cast_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_613_cast_fp16 = softmax(axis = var_52, x = aw_31_cast_fp16)[name = tensor("op_613_cast_fp16")]; + tensor var_614_cast_fp16 = softmax(axis = var_52, x = aw_33_cast_fp16)[name = tensor("op_614_cast_fp16")]; + tensor var_615_cast_fp16 = softmax(axis = var_52, x = aw_35_cast_fp16)[name = tensor("op_615_cast_fp16")]; + tensor var_616_cast_fp16 = softmax(axis = var_52, x = aw_37_cast_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_617_cast_fp16 = softmax(axis = var_52, x = aw_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618_cast_fp16 = softmax(axis = var_52, x = aw_41_cast_fp16)[name = tensor("op_618_cast_fp16")]; + tensor var_619_cast_fp16 = softmax(axis = var_52, x = aw_43_cast_fp16)[name = tensor("op_619_cast_fp16")]; + tensor var_620_cast_fp16 = softmax(axis = var_52, x = aw_45_cast_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_621_cast_fp16 = softmax(axis = var_52, x = aw_47_cast_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_623_equation_0 = const()[name = tensor("op_623_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_383_cast_fp16, var_598_cast_fp16))[name = tensor("op_623_cast_fp16")]; + tensor var_625_equation_0 = const()[name = tensor("op_625_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_383_cast_fp16, var_599_cast_fp16))[name = tensor("op_625_cast_fp16")]; + tensor var_627_equation_0 = const()[name = tensor("op_627_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_383_cast_fp16, var_600_cast_fp16))[name = tensor("op_627_cast_fp16")]; + tensor var_629_equation_0 = const()[name = tensor("op_629_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_395_cast_fp16, var_601_cast_fp16))[name = tensor("op_629_cast_fp16")]; + tensor var_631_equation_0 = const()[name = tensor("op_631_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_395_cast_fp16, var_602_cast_fp16))[name = tensor("op_631_cast_fp16")]; + tensor var_633_equation_0 = const()[name = tensor("op_633_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_395_cast_fp16, var_603_cast_fp16))[name = tensor("op_633_cast_fp16")]; + tensor var_635_equation_0 = const()[name = tensor("op_635_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_407_cast_fp16, var_604_cast_fp16))[name = tensor("op_635_cast_fp16")]; + tensor var_637_equation_0 = const()[name = tensor("op_637_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_407_cast_fp16, var_605_cast_fp16))[name = tensor("op_637_cast_fp16")]; + tensor var_639_equation_0 = const()[name = tensor("op_639_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_407_cast_fp16, var_606_cast_fp16))[name = tensor("op_639_cast_fp16")]; + tensor var_641_equation_0 = const()[name = tensor("op_641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_419_cast_fp16, var_607_cast_fp16))[name = tensor("op_641_cast_fp16")]; + tensor var_643_equation_0 = const()[name = tensor("op_643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_419_cast_fp16, var_608_cast_fp16))[name = tensor("op_643_cast_fp16")]; + tensor var_645_equation_0 = const()[name = tensor("op_645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_419_cast_fp16, var_609_cast_fp16))[name = tensor("op_645_cast_fp16")]; + tensor var_647_equation_0 = const()[name = tensor("op_647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_647_cast_fp16 = einsum(equation = var_647_equation_0, values = (var_431_cast_fp16, var_610_cast_fp16))[name = tensor("op_647_cast_fp16")]; + tensor var_649_equation_0 = const()[name = tensor("op_649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_649_cast_fp16 = einsum(equation = var_649_equation_0, values = (var_431_cast_fp16, var_611_cast_fp16))[name = tensor("op_649_cast_fp16")]; + tensor var_651_equation_0 = const()[name = tensor("op_651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_651_cast_fp16 = einsum(equation = var_651_equation_0, values = (var_431_cast_fp16, var_612_cast_fp16))[name = tensor("op_651_cast_fp16")]; + tensor var_653_equation_0 = const()[name = tensor("op_653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_653_cast_fp16 = einsum(equation = var_653_equation_0, values = (var_443_cast_fp16, var_613_cast_fp16))[name = tensor("op_653_cast_fp16")]; + tensor var_655_equation_0 = const()[name = tensor("op_655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_655_cast_fp16 = einsum(equation = var_655_equation_0, values = (var_443_cast_fp16, var_614_cast_fp16))[name = tensor("op_655_cast_fp16")]; + tensor var_657_equation_0 = const()[name = tensor("op_657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_657_cast_fp16 = einsum(equation = var_657_equation_0, values = (var_443_cast_fp16, var_615_cast_fp16))[name = tensor("op_657_cast_fp16")]; + tensor var_659_equation_0 = const()[name = tensor("op_659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_659_cast_fp16 = einsum(equation = var_659_equation_0, values = (var_455_cast_fp16, var_616_cast_fp16))[name = tensor("op_659_cast_fp16")]; + tensor var_661_equation_0 = const()[name = tensor("op_661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_661_cast_fp16 = einsum(equation = var_661_equation_0, values = (var_455_cast_fp16, var_617_cast_fp16))[name = tensor("op_661_cast_fp16")]; + tensor var_663_equation_0 = const()[name = tensor("op_663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_663_cast_fp16 = einsum(equation = var_663_equation_0, values = (var_455_cast_fp16, var_618_cast_fp16))[name = tensor("op_663_cast_fp16")]; + tensor var_665_equation_0 = const()[name = tensor("op_665_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_665_cast_fp16 = einsum(equation = var_665_equation_0, values = (var_467_cast_fp16, var_619_cast_fp16))[name = tensor("op_665_cast_fp16")]; + tensor var_667_equation_0 = const()[name = tensor("op_667_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_667_cast_fp16 = einsum(equation = var_667_equation_0, values = (var_467_cast_fp16, var_620_cast_fp16))[name = tensor("op_667_cast_fp16")]; + tensor var_669_equation_0 = const()[name = tensor("op_669_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_669_cast_fp16 = einsum(equation = var_669_equation_0, values = (var_467_cast_fp16, var_621_cast_fp16))[name = tensor("op_669_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_52, interleave = x_11_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16, var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16, var_647_cast_fp16, var_649_cast_fp16, var_651_cast_fp16, var_653_cast_fp16, var_655_cast_fp16, var_657_cast_fp16, var_659_cast_fp16, var_661_cast_fp16, var_663_cast_fp16, var_665_cast_fp16, var_667_cast_fp16, var_669_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_674 = const()[name = tensor("op_674"), val = tensor([1, 3072, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_674, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_677 = const()[name = tensor("op_677"), val = tensor([1, 1])]; + tensor var_679 = const()[name = tensor("op_679"), val = tensor([1, 1])]; + tensor attention_output_1_pad_type_0 = const()[name = tensor("attention_output_1_pad_type_0"), val = tensor("custom")]; + tensor attention_output_1_pad_0 = const()[name = tensor("attention_output_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31463936)))]; + tensor attention_output_1_cast_fp16 = conv(dilations = var_679, groups = var_52, pad = attention_output_1_pad_0, pad_type = attention_output_1_pad_type_0, strides = var_677, weight = blocks_0_attn_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338368)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_52, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_55, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_705_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338560)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_716 = const()[name = tensor("op_716"), val = tensor([1, 1])]; + tensor var_718 = const()[name = tensor("op_718"), val = tensor([1, 1])]; + tensor input_7_pad_type_0 = const()[name = tensor("input_7_pad_type_0"), val = tensor("custom")]; + tensor input_7_pad_0 = const()[name = tensor("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50344768)))]; + tensor input_7_cast_fp16 = conv(dilations = var_718, groups = var_52, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_716, weight = blocks_0_mlp_fc_1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_722 = const()[name = tensor("op_722"), val = tensor([1, 1])]; + tensor var_724 = const()[name = tensor("op_724"), val = tensor([1, 1])]; + tensor x_fc_2_1_pad_type_0 = const()[name = tensor("x_fc_2_1_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_1_pad_0 = const()[name = tensor("x_fc_2_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100676480)))]; + tensor x_fc_2_1_cast_fp16 = conv(dilations = var_724, groups = var_52, pad = x_fc_2_1_pad_0, pad_type = x_fc_2_1_pad_type_0, strides = var_722, weight = blocks_0_mlp_fc_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_727_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_727_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_727_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_730 = const()[name = tensor("op_730"), val = tensor([1, 1])]; + tensor var_732 = const()[name = tensor("op_732"), val = tensor([1, 1])]; + tensor var_734_pad_type_0 = const()[name = tensor("op_734_pad_type_0"), val = tensor("custom")]; + tensor var_734_pad_0 = const()[name = tensor("op_734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151008192)))]; + tensor var_734_cast_fp16 = conv(dilations = var_732, groups = var_52, pad = var_734_pad_0, pad_type = var_734_pad_type_0, strides = var_730, weight = blocks_0_mlp_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("op_734_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_734_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_740 = const()[name = tensor("op_740"), val = tensor(-1)]; + tensor var_744 = const()[name = tensor("op_744"), val = tensor(-2)]; + tensor var_746 = const()[name = tensor("op_746"), val = tensor(-3)]; + tensor var_779 = const()[name = tensor("op_779"), val = tensor(1)]; + tensor var_782 = const()[name = tensor("op_782"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201339904)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_779, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_782, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_805_to_fp16 = const()[name = tensor("op_805_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_805_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201340096)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_829 = const()[name = tensor("op_829"), val = tensor([1, 3072, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_829, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_832 = const()[name = tensor("op_832"), val = tensor([1, 1])]; + tensor var_834 = const()[name = tensor("op_834"), val = tensor([1, 1])]; + tensor q_9_pad_type_0 = const()[name = tensor("q_9_pad_type_0"), val = tensor("custom")]; + tensor q_9_pad_0 = const()[name = tensor("q_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201346304)))]; + tensor q_9_cast_fp16 = conv(dilations = var_834, groups = var_779, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = var_832, weight = blocks_1_attn_q_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_838 = const()[name = tensor("op_838"), val = tensor([1, 1])]; + tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 1])]; + tensor k_13_pad_type_0 = const()[name = tensor("k_13_pad_type_0"), val = tensor("custom")]; + tensor k_13_pad_0 = const()[name = tensor("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220220736)))]; + tensor k_13_cast_fp16 = conv(dilations = var_840, groups = var_779, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = var_838, weight = blocks_1_attn_k_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_844 = const()[name = tensor("op_844"), val = tensor([1, 1])]; + tensor var_846 = const()[name = tensor("op_846"), val = tensor([1, 1])]; + tensor v_11_pad_type_0 = const()[name = tensor("v_11_pad_type_0"), val = tensor("custom")]; + tensor v_11_pad_0 = const()[name = tensor("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226512256)))]; + tensor v_11_cast_fp16 = conv(dilations = var_846, groups = var_779, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = var_844, weight = blocks_1_attn_v_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_849 = const()[name = tensor("op_849"), val = tensor([1, 24, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_849, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_851 = const()[name = tensor("op_851"), val = tensor([1, -1, 128, 64])]; + tensor k_15_cast_fp16 = reshape(shape = var_851, x = k_13_cast_fp16)[name = tensor("k_15_cast_fp16")]; + tensor var_865_begin_0 = const()[name = tensor("op_865_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_865_end_0 = const()[name = tensor("op_865_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_865_end_mask_0 = const()[name = tensor("op_865_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_865_cast_fp16")]; + tensor var_871_begin_0 = const()[name = tensor("op_871_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_871_end_0 = const()[name = tensor("op_871_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_871_end_mask_0 = const()[name = tensor("op_871_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_871_cast_fp16 = slice_by_index(begin = var_871_begin_0, end = var_871_end_0, end_mask = var_871_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_871_cast_fp16")]; + tensor const_30_promoted_to_fp16 = const()[name = tensor("const_30_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_873_cast_fp16 = mul(x = var_871_cast_fp16, y = const_30_promoted_to_fp16)[name = tensor("op_873_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_744, interleave = rotated_5_interleave_0, values = (var_873_cast_fp16, var_865_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_876_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_876_cast_fp16")]; + tensor var_877_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_877_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_876_cast_fp16, y = var_877_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_890_begin_0 = const()[name = tensor("op_890_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_890_end_0 = const()[name = tensor("op_890_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_890_end_mask_0 = const()[name = tensor("op_890_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_890_cast_fp16 = slice_by_index(begin = var_890_begin_0, end = var_890_end_0, end_mask = var_890_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_890_cast_fp16")]; + tensor var_896_begin_0 = const()[name = tensor("op_896_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_896_end_0 = const()[name = tensor("op_896_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_896_end_mask_0 = const()[name = tensor("op_896_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_896_cast_fp16 = slice_by_index(begin = var_896_begin_0, end = var_896_end_0, end_mask = var_896_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_896_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_898_cast_fp16 = mul(x = var_896_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_898_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_744, interleave = rotated_interleave_0, values = (var_898_cast_fp16, var_890_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_901_cast_fp16 = mul(x = k_15_cast_fp16, y = cos)[name = tensor("op_901_cast_fp16")]; + tensor var_902_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_902_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_901_cast_fp16, y = var_902_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_905 = const()[name = tensor("op_905"), val = tensor([1, -1, 1, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_905, x = roped_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_907 = const()[name = tensor("op_907"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_907, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_21_perm_0, x = k_19_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_746, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_740, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_915 = const()[name = tensor("op_915"), val = tensor([1, 3072, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_915, x = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_920_begin_0 = const()[name = tensor("op_920_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_end_0 = const()[name = tensor("op_920_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_920_end_mask_0 = const()[name = tensor("op_920_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = q_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor var_924_begin_0 = const()[name = tensor("op_924_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_924_end_0 = const()[name = tensor("op_924_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_924_end_mask_0 = const()[name = tensor("op_924_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_924_cast_fp16 = slice_by_index(begin = var_924_begin_0, end = var_924_end_0, end_mask = var_924_end_mask_0, x = q_cast_fp16)[name = tensor("op_924_cast_fp16")]; + tensor var_928_begin_0 = const()[name = tensor("op_928_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_928_end_0 = const()[name = tensor("op_928_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_928_end_mask_0 = const()[name = tensor("op_928_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = q_cast_fp16)[name = tensor("op_928_cast_fp16")]; + tensor var_932_begin_0 = const()[name = tensor("op_932_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_932_end_0 = const()[name = tensor("op_932_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_932_end_mask_0 = const()[name = tensor("op_932_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_932_cast_fp16 = slice_by_index(begin = var_932_begin_0, end = var_932_end_0, end_mask = var_932_end_mask_0, x = q_cast_fp16)[name = tensor("op_932_cast_fp16")]; + tensor var_936_begin_0 = const()[name = tensor("op_936_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_936_end_0 = const()[name = tensor("op_936_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_936_end_mask_0 = const()[name = tensor("op_936_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_936_cast_fp16 = slice_by_index(begin = var_936_begin_0, end = var_936_end_0, end_mask = var_936_end_mask_0, x = q_cast_fp16)[name = tensor("op_936_cast_fp16")]; + tensor var_940_begin_0 = const()[name = tensor("op_940_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_940_end_0 = const()[name = tensor("op_940_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_940_end_mask_0 = const()[name = tensor("op_940_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_940_cast_fp16 = slice_by_index(begin = var_940_begin_0, end = var_940_end_0, end_mask = var_940_end_mask_0, x = q_cast_fp16)[name = tensor("op_940_cast_fp16")]; + tensor var_944_begin_0 = const()[name = tensor("op_944_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_944_end_0 = const()[name = tensor("op_944_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_944_end_mask_0 = const()[name = tensor("op_944_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_944_cast_fp16 = slice_by_index(begin = var_944_begin_0, end = var_944_end_0, end_mask = var_944_end_mask_0, x = q_cast_fp16)[name = tensor("op_944_cast_fp16")]; + tensor var_948_begin_0 = const()[name = tensor("op_948_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_948_end_0 = const()[name = tensor("op_948_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_948_end_mask_0 = const()[name = tensor("op_948_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_948_cast_fp16 = slice_by_index(begin = var_948_begin_0, end = var_948_end_0, end_mask = var_948_end_mask_0, x = q_cast_fp16)[name = tensor("op_948_cast_fp16")]; + tensor var_952_begin_0 = const()[name = tensor("op_952_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_952_end_0 = const()[name = tensor("op_952_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_952_end_mask_0 = const()[name = tensor("op_952_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_952_cast_fp16 = slice_by_index(begin = var_952_begin_0, end = var_952_end_0, end_mask = var_952_end_mask_0, x = q_cast_fp16)[name = tensor("op_952_cast_fp16")]; + tensor var_956_begin_0 = const()[name = tensor("op_956_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_956_end_0 = const()[name = tensor("op_956_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_956_end_mask_0 = const()[name = tensor("op_956_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_956_cast_fp16 = slice_by_index(begin = var_956_begin_0, end = var_956_end_0, end_mask = var_956_end_mask_0, x = q_cast_fp16)[name = tensor("op_956_cast_fp16")]; + tensor var_960_begin_0 = const()[name = tensor("op_960_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_960_end_0 = const()[name = tensor("op_960_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_960_end_mask_0 = const()[name = tensor("op_960_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_960_cast_fp16 = slice_by_index(begin = var_960_begin_0, end = var_960_end_0, end_mask = var_960_end_mask_0, x = q_cast_fp16)[name = tensor("op_960_cast_fp16")]; + tensor var_964_begin_0 = const()[name = tensor("op_964_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_964_end_0 = const()[name = tensor("op_964_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_964_end_mask_0 = const()[name = tensor("op_964_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_964_cast_fp16 = slice_by_index(begin = var_964_begin_0, end = var_964_end_0, end_mask = var_964_end_mask_0, x = q_cast_fp16)[name = tensor("op_964_cast_fp16")]; + tensor var_968_begin_0 = const()[name = tensor("op_968_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_968_end_0 = const()[name = tensor("op_968_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_968_end_mask_0 = const()[name = tensor("op_968_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = q_cast_fp16)[name = tensor("op_968_cast_fp16")]; + tensor var_972_begin_0 = const()[name = tensor("op_972_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_972_end_0 = const()[name = tensor("op_972_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_972_end_mask_0 = const()[name = tensor("op_972_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = q_cast_fp16)[name = tensor("op_972_cast_fp16")]; + tensor var_976_begin_0 = const()[name = tensor("op_976_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_976_end_0 = const()[name = tensor("op_976_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_976_end_mask_0 = const()[name = tensor("op_976_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = var_976_end_0, end_mask = var_976_end_mask_0, x = q_cast_fp16)[name = tensor("op_976_cast_fp16")]; + tensor var_980_begin_0 = const()[name = tensor("op_980_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_980_end_0 = const()[name = tensor("op_980_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_980_end_mask_0 = const()[name = tensor("op_980_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_980_cast_fp16 = slice_by_index(begin = var_980_begin_0, end = var_980_end_0, end_mask = var_980_end_mask_0, x = q_cast_fp16)[name = tensor("op_980_cast_fp16")]; + tensor var_984_begin_0 = const()[name = tensor("op_984_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_984_end_0 = const()[name = tensor("op_984_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_984_end_mask_0 = const()[name = tensor("op_984_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_984_cast_fp16 = slice_by_index(begin = var_984_begin_0, end = var_984_end_0, end_mask = var_984_end_mask_0, x = q_cast_fp16)[name = tensor("op_984_cast_fp16")]; + tensor var_988_begin_0 = const()[name = tensor("op_988_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_988_end_0 = const()[name = tensor("op_988_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_988_end_mask_0 = const()[name = tensor("op_988_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_988_cast_fp16 = slice_by_index(begin = var_988_begin_0, end = var_988_end_0, end_mask = var_988_end_mask_0, x = q_cast_fp16)[name = tensor("op_988_cast_fp16")]; + tensor var_992_begin_0 = const()[name = tensor("op_992_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_992_end_0 = const()[name = tensor("op_992_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_992_end_mask_0 = const()[name = tensor("op_992_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_992_cast_fp16 = slice_by_index(begin = var_992_begin_0, end = var_992_end_0, end_mask = var_992_end_mask_0, x = q_cast_fp16)[name = tensor("op_992_cast_fp16")]; + tensor var_996_begin_0 = const()[name = tensor("op_996_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_996_end_0 = const()[name = tensor("op_996_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_996_end_mask_0 = const()[name = tensor("op_996_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_996_cast_fp16 = slice_by_index(begin = var_996_begin_0, end = var_996_end_0, end_mask = var_996_end_mask_0, x = q_cast_fp16)[name = tensor("op_996_cast_fp16")]; + tensor var_1000_begin_0 = const()[name = tensor("op_1000_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1000_end_0 = const()[name = tensor("op_1000_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1000_end_mask_0 = const()[name = tensor("op_1000_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = var_1000_end_0, end_mask = var_1000_end_mask_0, x = q_cast_fp16)[name = tensor("op_1000_cast_fp16")]; + tensor var_1004_begin_0 = const()[name = tensor("op_1004_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1004_end_0 = const()[name = tensor("op_1004_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1004_end_mask_0 = const()[name = tensor("op_1004_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1004_cast_fp16 = slice_by_index(begin = var_1004_begin_0, end = var_1004_end_0, end_mask = var_1004_end_mask_0, x = q_cast_fp16)[name = tensor("op_1004_cast_fp16")]; + tensor var_1008_begin_0 = const()[name = tensor("op_1008_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1008_end_0 = const()[name = tensor("op_1008_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1008_end_mask_0 = const()[name = tensor("op_1008_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = var_1008_end_0, end_mask = var_1008_end_mask_0, x = q_cast_fp16)[name = tensor("op_1008_cast_fp16")]; + tensor var_1012_begin_0 = const()[name = tensor("op_1012_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1012_end_0 = const()[name = tensor("op_1012_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1012_end_mask_0 = const()[name = tensor("op_1012_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1012_cast_fp16 = slice_by_index(begin = var_1012_begin_0, end = var_1012_end_0, end_mask = var_1012_end_mask_0, x = q_cast_fp16)[name = tensor("op_1012_cast_fp16")]; + tensor var_1018_begin_0 = const()[name = tensor("op_1018_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1018_end_0 = const()[name = tensor("op_1018_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1018_end_mask_0 = const()[name = tensor("op_1018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1018_cast_fp16 = slice_by_index(begin = var_1018_begin_0, end = var_1018_end_0, end_mask = var_1018_end_mask_0, x = k_cast_fp16)[name = tensor("op_1018_cast_fp16")]; + tensor var_1030_begin_0 = const()[name = tensor("op_1030_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1030_end_0 = const()[name = tensor("op_1030_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1030_end_mask_0 = const()[name = tensor("op_1030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, x = k_cast_fp16)[name = tensor("op_1030_cast_fp16")]; + tensor var_1042_begin_0 = const()[name = tensor("op_1042_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1042_end_0 = const()[name = tensor("op_1042_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1042_end_mask_0 = const()[name = tensor("op_1042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = k_cast_fp16)[name = tensor("op_1042_cast_fp16")]; + tensor var_1054_begin_0 = const()[name = tensor("op_1054_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1054_end_0 = const()[name = tensor("op_1054_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1054_end_mask_0 = const()[name = tensor("op_1054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1054_cast_fp16 = slice_by_index(begin = var_1054_begin_0, end = var_1054_end_0, end_mask = var_1054_end_mask_0, x = k_cast_fp16)[name = tensor("op_1054_cast_fp16")]; + tensor var_1066_begin_0 = const()[name = tensor("op_1066_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1066_end_0 = const()[name = tensor("op_1066_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1066_end_mask_0 = const()[name = tensor("op_1066_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1066_cast_fp16 = slice_by_index(begin = var_1066_begin_0, end = var_1066_end_0, end_mask = var_1066_end_mask_0, x = k_cast_fp16)[name = tensor("op_1066_cast_fp16")]; + tensor var_1078_begin_0 = const()[name = tensor("op_1078_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1078_end_0 = const()[name = tensor("op_1078_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1078_end_mask_0 = const()[name = tensor("op_1078_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1078_cast_fp16 = slice_by_index(begin = var_1078_begin_0, end = var_1078_end_0, end_mask = var_1078_end_mask_0, x = k_cast_fp16)[name = tensor("op_1078_cast_fp16")]; + tensor var_1090_begin_0 = const()[name = tensor("op_1090_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1090_end_0 = const()[name = tensor("op_1090_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1090_end_mask_0 = const()[name = tensor("op_1090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = k_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor var_1102_begin_0 = const()[name = tensor("op_1102_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1102_end_0 = const()[name = tensor("op_1102_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1102_end_mask_0 = const()[name = tensor("op_1102_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1102_cast_fp16 = slice_by_index(begin = var_1102_begin_0, end = var_1102_end_0, end_mask = var_1102_end_mask_0, x = k_cast_fp16)[name = tensor("op_1102_cast_fp16")]; + tensor var_1112_begin_0 = const()[name = tensor("op_1112_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1112_end_0 = const()[name = tensor("op_1112_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1112_end_mask_0 = const()[name = tensor("op_1112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1112_cast_fp16")]; + tensor var_1124_begin_0 = const()[name = tensor("op_1124_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1124_end_0 = const()[name = tensor("op_1124_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1124_end_mask_0 = const()[name = tensor("op_1124_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1124_cast_fp16")]; + tensor var_1136_begin_0 = const()[name = tensor("op_1136_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1136_end_0 = const()[name = tensor("op_1136_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1136_end_mask_0 = const()[name = tensor("op_1136_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1136_cast_fp16")]; + tensor var_1148_begin_0 = const()[name = tensor("op_1148_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1148_end_0 = const()[name = tensor("op_1148_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1148_end_mask_0 = const()[name = tensor("op_1148_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1148_cast_fp16")]; + tensor var_1160_begin_0 = const()[name = tensor("op_1160_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1160_end_0 = const()[name = tensor("op_1160_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1160_end_mask_0 = const()[name = tensor("op_1160_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1160_cast_fp16")]; + tensor var_1172_begin_0 = const()[name = tensor("op_1172_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1172_end_0 = const()[name = tensor("op_1172_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1172_end_mask_0 = const()[name = tensor("op_1172_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1172_cast_fp16 = slice_by_index(begin = var_1172_begin_0, end = var_1172_end_0, end_mask = var_1172_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1172_cast_fp16")]; + tensor var_1184_begin_0 = const()[name = tensor("op_1184_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1184_end_0 = const()[name = tensor("op_1184_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1184_end_mask_0 = const()[name = tensor("op_1184_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1184_cast_fp16 = slice_by_index(begin = var_1184_begin_0, end = var_1184_end_0, end_mask = var_1184_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1184_cast_fp16")]; + tensor var_1196_begin_0 = const()[name = tensor("op_1196_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1196_end_0 = const()[name = tensor("op_1196_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1196_end_mask_0 = const()[name = tensor("op_1196_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1196_cast_fp16")]; + tensor var_1208_equation_0 = const()[name = tensor("op_1208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1208_cast_fp16 = einsum(equation = var_1208_equation_0, values = (var_1018_cast_fp16, var_920_cast_fp16))[name = tensor("op_1208_cast_fp16")]; + tensor var_1209_to_fp16 = const()[name = tensor("op_1209_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1210_cast_fp16 = mul(x = var_1208_cast_fp16, y = var_1209_to_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1212_equation_0 = const()[name = tensor("op_1212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1212_cast_fp16 = einsum(equation = var_1212_equation_0, values = (var_1018_cast_fp16, var_924_cast_fp16))[name = tensor("op_1212_cast_fp16")]; + tensor var_1213_to_fp16 = const()[name = tensor("op_1213_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1214_cast_fp16 = mul(x = var_1212_cast_fp16, y = var_1213_to_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1216_equation_0 = const()[name = tensor("op_1216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1216_cast_fp16 = einsum(equation = var_1216_equation_0, values = (var_1018_cast_fp16, var_928_cast_fp16))[name = tensor("op_1216_cast_fp16")]; + tensor var_1217_to_fp16 = const()[name = tensor("op_1217_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1218_cast_fp16 = mul(x = var_1216_cast_fp16, y = var_1217_to_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1220_equation_0 = const()[name = tensor("op_1220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1220_cast_fp16 = einsum(equation = var_1220_equation_0, values = (var_1030_cast_fp16, var_932_cast_fp16))[name = tensor("op_1220_cast_fp16")]; + tensor var_1221_to_fp16 = const()[name = tensor("op_1221_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1222_cast_fp16 = mul(x = var_1220_cast_fp16, y = var_1221_to_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1224_equation_0 = const()[name = tensor("op_1224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1224_cast_fp16 = einsum(equation = var_1224_equation_0, values = (var_1030_cast_fp16, var_936_cast_fp16))[name = tensor("op_1224_cast_fp16")]; + tensor var_1225_to_fp16 = const()[name = tensor("op_1225_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1226_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1030_cast_fp16, var_940_cast_fp16))[name = tensor("op_1228_cast_fp16")]; + tensor var_1229_to_fp16 = const()[name = tensor("op_1229_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1230_cast_fp16 = mul(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1042_cast_fp16, var_944_cast_fp16))[name = tensor("op_1232_cast_fp16")]; + tensor var_1233_to_fp16 = const()[name = tensor("op_1233_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1234_cast_fp16 = mul(x = var_1232_cast_fp16, y = var_1233_to_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1042_cast_fp16, var_948_cast_fp16))[name = tensor("op_1236_cast_fp16")]; + tensor var_1237_to_fp16 = const()[name = tensor("op_1237_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1238_cast_fp16 = mul(x = var_1236_cast_fp16, y = var_1237_to_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1042_cast_fp16, var_952_cast_fp16))[name = tensor("op_1240_cast_fp16")]; + tensor var_1241_to_fp16 = const()[name = tensor("op_1241_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1242_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1054_cast_fp16, var_956_cast_fp16))[name = tensor("op_1244_cast_fp16")]; + tensor var_1245_to_fp16 = const()[name = tensor("op_1245_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1246_cast_fp16 = mul(x = var_1244_cast_fp16, y = var_1245_to_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1054_cast_fp16, var_960_cast_fp16))[name = tensor("op_1248_cast_fp16")]; + tensor var_1249_to_fp16 = const()[name = tensor("op_1249_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1250_cast_fp16 = mul(x = var_1248_cast_fp16, y = var_1249_to_fp16)[name = tensor("op_1250_cast_fp16")]; + tensor var_1252_equation_0 = const()[name = tensor("op_1252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1054_cast_fp16, var_964_cast_fp16))[name = tensor("op_1252_cast_fp16")]; + tensor var_1253_to_fp16 = const()[name = tensor("op_1253_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1254_cast_fp16 = mul(x = var_1252_cast_fp16, y = var_1253_to_fp16)[name = tensor("op_1254_cast_fp16")]; + tensor var_1256_equation_0 = const()[name = tensor("op_1256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1066_cast_fp16, var_968_cast_fp16))[name = tensor("op_1256_cast_fp16")]; + tensor var_1257_to_fp16 = const()[name = tensor("op_1257_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1258_cast_fp16 = mul(x = var_1256_cast_fp16, y = var_1257_to_fp16)[name = tensor("op_1258_cast_fp16")]; + tensor var_1260_equation_0 = const()[name = tensor("op_1260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1066_cast_fp16, var_972_cast_fp16))[name = tensor("op_1260_cast_fp16")]; + tensor var_1261_to_fp16 = const()[name = tensor("op_1261_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1262_cast_fp16 = mul(x = var_1260_cast_fp16, y = var_1261_to_fp16)[name = tensor("op_1262_cast_fp16")]; + tensor var_1264_equation_0 = const()[name = tensor("op_1264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1066_cast_fp16, var_976_cast_fp16))[name = tensor("op_1264_cast_fp16")]; + tensor var_1265_to_fp16 = const()[name = tensor("op_1265_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1266_cast_fp16 = mul(x = var_1264_cast_fp16, y = var_1265_to_fp16)[name = tensor("op_1266_cast_fp16")]; + tensor var_1268_equation_0 = const()[name = tensor("op_1268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1078_cast_fp16, var_980_cast_fp16))[name = tensor("op_1268_cast_fp16")]; + tensor var_1269_to_fp16 = const()[name = tensor("op_1269_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1270_cast_fp16 = mul(x = var_1268_cast_fp16, y = var_1269_to_fp16)[name = tensor("op_1270_cast_fp16")]; + tensor var_1272_equation_0 = const()[name = tensor("op_1272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1078_cast_fp16, var_984_cast_fp16))[name = tensor("op_1272_cast_fp16")]; + tensor var_1273_to_fp16 = const()[name = tensor("op_1273_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1274_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = tensor("op_1274_cast_fp16")]; + tensor var_1276_equation_0 = const()[name = tensor("op_1276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1078_cast_fp16, var_988_cast_fp16))[name = tensor("op_1276_cast_fp16")]; + tensor var_1277_to_fp16 = const()[name = tensor("op_1277_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1278_cast_fp16 = mul(x = var_1276_cast_fp16, y = var_1277_to_fp16)[name = tensor("op_1278_cast_fp16")]; + tensor var_1280_equation_0 = const()[name = tensor("op_1280_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1090_cast_fp16, var_992_cast_fp16))[name = tensor("op_1280_cast_fp16")]; + tensor var_1281_to_fp16 = const()[name = tensor("op_1281_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1282_cast_fp16 = mul(x = var_1280_cast_fp16, y = var_1281_to_fp16)[name = tensor("op_1282_cast_fp16")]; + tensor var_1284_equation_0 = const()[name = tensor("op_1284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1284_cast_fp16 = einsum(equation = var_1284_equation_0, values = (var_1090_cast_fp16, var_996_cast_fp16))[name = tensor("op_1284_cast_fp16")]; + tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1286_cast_fp16 = mul(x = var_1284_cast_fp16, y = var_1285_to_fp16)[name = tensor("op_1286_cast_fp16")]; + tensor var_1288_equation_0 = const()[name = tensor("op_1288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1288_cast_fp16 = einsum(equation = var_1288_equation_0, values = (var_1090_cast_fp16, var_1000_cast_fp16))[name = tensor("op_1288_cast_fp16")]; + tensor var_1289_to_fp16 = const()[name = tensor("op_1289_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1290_cast_fp16 = mul(x = var_1288_cast_fp16, y = var_1289_to_fp16)[name = tensor("op_1290_cast_fp16")]; + tensor var_1292_equation_0 = const()[name = tensor("op_1292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1292_cast_fp16 = einsum(equation = var_1292_equation_0, values = (var_1102_cast_fp16, var_1004_cast_fp16))[name = tensor("op_1292_cast_fp16")]; + tensor var_1293_to_fp16 = const()[name = tensor("op_1293_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1294_cast_fp16 = mul(x = var_1292_cast_fp16, y = var_1293_to_fp16)[name = tensor("op_1294_cast_fp16")]; + tensor var_1296_equation_0 = const()[name = tensor("op_1296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1296_cast_fp16 = einsum(equation = var_1296_equation_0, values = (var_1102_cast_fp16, var_1008_cast_fp16))[name = tensor("op_1296_cast_fp16")]; + tensor var_1297_to_fp16 = const()[name = tensor("op_1297_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1298_cast_fp16 = mul(x = var_1296_cast_fp16, y = var_1297_to_fp16)[name = tensor("op_1298_cast_fp16")]; + tensor var_1300_equation_0 = const()[name = tensor("op_1300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1300_cast_fp16 = einsum(equation = var_1300_equation_0, values = (var_1102_cast_fp16, var_1012_cast_fp16))[name = tensor("op_1300_cast_fp16")]; + tensor var_1301_to_fp16 = const()[name = tensor("op_1301_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1302_cast_fp16 = mul(x = var_1300_cast_fp16, y = var_1301_to_fp16)[name = tensor("op_1302_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_1210_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_1214_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_1218_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_1222_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_1226_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_1230_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_1234_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_1238_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1242_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1246_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1250_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1254_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1258_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1262_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1266_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1270_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1274_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1278_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1282_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1286_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1290_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1294_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1298_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_1302_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_1327_cast_fp16 = softmax(axis = var_779, x = aw_49_cast_fp16)[name = tensor("op_1327_cast_fp16")]; + tensor var_1328_cast_fp16 = softmax(axis = var_779, x = aw_51_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1329_cast_fp16 = softmax(axis = var_779, x = aw_53_cast_fp16)[name = tensor("op_1329_cast_fp16")]; + tensor var_1330_cast_fp16 = softmax(axis = var_779, x = aw_55_cast_fp16)[name = tensor("op_1330_cast_fp16")]; + tensor var_1331_cast_fp16 = softmax(axis = var_779, x = aw_57_cast_fp16)[name = tensor("op_1331_cast_fp16")]; + tensor var_1332_cast_fp16 = softmax(axis = var_779, x = aw_59_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1333_cast_fp16 = softmax(axis = var_779, x = aw_61_cast_fp16)[name = tensor("op_1333_cast_fp16")]; + tensor var_1334_cast_fp16 = softmax(axis = var_779, x = aw_63_cast_fp16)[name = tensor("op_1334_cast_fp16")]; + tensor var_1335_cast_fp16 = softmax(axis = var_779, x = aw_65_cast_fp16)[name = tensor("op_1335_cast_fp16")]; + tensor var_1336_cast_fp16 = softmax(axis = var_779, x = aw_67_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1337_cast_fp16 = softmax(axis = var_779, x = aw_69_cast_fp16)[name = tensor("op_1337_cast_fp16")]; + tensor var_1338_cast_fp16 = softmax(axis = var_779, x = aw_71_cast_fp16)[name = tensor("op_1338_cast_fp16")]; + tensor var_1339_cast_fp16 = softmax(axis = var_779, x = aw_73_cast_fp16)[name = tensor("op_1339_cast_fp16")]; + tensor var_1340_cast_fp16 = softmax(axis = var_779, x = aw_75_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1341_cast_fp16 = softmax(axis = var_779, x = aw_77_cast_fp16)[name = tensor("op_1341_cast_fp16")]; + tensor var_1342_cast_fp16 = softmax(axis = var_779, x = aw_79_cast_fp16)[name = tensor("op_1342_cast_fp16")]; + tensor var_1343_cast_fp16 = softmax(axis = var_779, x = aw_81_cast_fp16)[name = tensor("op_1343_cast_fp16")]; + tensor var_1344_cast_fp16 = softmax(axis = var_779, x = aw_83_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1345_cast_fp16 = softmax(axis = var_779, x = aw_85_cast_fp16)[name = tensor("op_1345_cast_fp16")]; + tensor var_1346_cast_fp16 = softmax(axis = var_779, x = aw_87_cast_fp16)[name = tensor("op_1346_cast_fp16")]; + tensor var_1347_cast_fp16 = softmax(axis = var_779, x = aw_89_cast_fp16)[name = tensor("op_1347_cast_fp16")]; + tensor var_1348_cast_fp16 = softmax(axis = var_779, x = aw_91_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1349_cast_fp16 = softmax(axis = var_779, x = aw_93_cast_fp16)[name = tensor("op_1349_cast_fp16")]; + tensor var_1350_cast_fp16 = softmax(axis = var_779, x = aw_cast_fp16)[name = tensor("op_1350_cast_fp16")]; + tensor var_1352_equation_0 = const()[name = tensor("op_1352_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1352_cast_fp16 = einsum(equation = var_1352_equation_0, values = (var_1112_cast_fp16, var_1327_cast_fp16))[name = tensor("op_1352_cast_fp16")]; + tensor var_1354_equation_0 = const()[name = tensor("op_1354_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1354_cast_fp16 = einsum(equation = var_1354_equation_0, values = (var_1112_cast_fp16, var_1328_cast_fp16))[name = tensor("op_1354_cast_fp16")]; + tensor var_1356_equation_0 = const()[name = tensor("op_1356_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1356_cast_fp16 = einsum(equation = var_1356_equation_0, values = (var_1112_cast_fp16, var_1329_cast_fp16))[name = tensor("op_1356_cast_fp16")]; + tensor var_1358_equation_0 = const()[name = tensor("op_1358_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1358_cast_fp16 = einsum(equation = var_1358_equation_0, values = (var_1124_cast_fp16, var_1330_cast_fp16))[name = tensor("op_1358_cast_fp16")]; + tensor var_1360_equation_0 = const()[name = tensor("op_1360_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1360_cast_fp16 = einsum(equation = var_1360_equation_0, values = (var_1124_cast_fp16, var_1331_cast_fp16))[name = tensor("op_1360_cast_fp16")]; + tensor var_1362_equation_0 = const()[name = tensor("op_1362_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1362_cast_fp16 = einsum(equation = var_1362_equation_0, values = (var_1124_cast_fp16, var_1332_cast_fp16))[name = tensor("op_1362_cast_fp16")]; + tensor var_1364_equation_0 = const()[name = tensor("op_1364_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1364_cast_fp16 = einsum(equation = var_1364_equation_0, values = (var_1136_cast_fp16, var_1333_cast_fp16))[name = tensor("op_1364_cast_fp16")]; + tensor var_1366_equation_0 = const()[name = tensor("op_1366_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1366_cast_fp16 = einsum(equation = var_1366_equation_0, values = (var_1136_cast_fp16, var_1334_cast_fp16))[name = tensor("op_1366_cast_fp16")]; + tensor var_1368_equation_0 = const()[name = tensor("op_1368_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1368_cast_fp16 = einsum(equation = var_1368_equation_0, values = (var_1136_cast_fp16, var_1335_cast_fp16))[name = tensor("op_1368_cast_fp16")]; + tensor var_1370_equation_0 = const()[name = tensor("op_1370_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1370_cast_fp16 = einsum(equation = var_1370_equation_0, values = (var_1148_cast_fp16, var_1336_cast_fp16))[name = tensor("op_1370_cast_fp16")]; + tensor var_1372_equation_0 = const()[name = tensor("op_1372_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1372_cast_fp16 = einsum(equation = var_1372_equation_0, values = (var_1148_cast_fp16, var_1337_cast_fp16))[name = tensor("op_1372_cast_fp16")]; + tensor var_1374_equation_0 = const()[name = tensor("op_1374_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1374_cast_fp16 = einsum(equation = var_1374_equation_0, values = (var_1148_cast_fp16, var_1338_cast_fp16))[name = tensor("op_1374_cast_fp16")]; + tensor var_1376_equation_0 = const()[name = tensor("op_1376_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1376_cast_fp16 = einsum(equation = var_1376_equation_0, values = (var_1160_cast_fp16, var_1339_cast_fp16))[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_equation_0 = const()[name = tensor("op_1378_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1378_cast_fp16 = einsum(equation = var_1378_equation_0, values = (var_1160_cast_fp16, var_1340_cast_fp16))[name = tensor("op_1378_cast_fp16")]; + tensor var_1380_equation_0 = const()[name = tensor("op_1380_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1380_cast_fp16 = einsum(equation = var_1380_equation_0, values = (var_1160_cast_fp16, var_1341_cast_fp16))[name = tensor("op_1380_cast_fp16")]; + tensor var_1382_equation_0 = const()[name = tensor("op_1382_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1382_cast_fp16 = einsum(equation = var_1382_equation_0, values = (var_1172_cast_fp16, var_1342_cast_fp16))[name = tensor("op_1382_cast_fp16")]; + tensor var_1384_equation_0 = const()[name = tensor("op_1384_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1384_cast_fp16 = einsum(equation = var_1384_equation_0, values = (var_1172_cast_fp16, var_1343_cast_fp16))[name = tensor("op_1384_cast_fp16")]; + tensor var_1386_equation_0 = const()[name = tensor("op_1386_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1386_cast_fp16 = einsum(equation = var_1386_equation_0, values = (var_1172_cast_fp16, var_1344_cast_fp16))[name = tensor("op_1386_cast_fp16")]; + tensor var_1388_equation_0 = const()[name = tensor("op_1388_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1388_cast_fp16 = einsum(equation = var_1388_equation_0, values = (var_1184_cast_fp16, var_1345_cast_fp16))[name = tensor("op_1388_cast_fp16")]; + tensor var_1390_equation_0 = const()[name = tensor("op_1390_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1390_cast_fp16 = einsum(equation = var_1390_equation_0, values = (var_1184_cast_fp16, var_1346_cast_fp16))[name = tensor("op_1390_cast_fp16")]; + tensor var_1392_equation_0 = const()[name = tensor("op_1392_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1392_cast_fp16 = einsum(equation = var_1392_equation_0, values = (var_1184_cast_fp16, var_1347_cast_fp16))[name = tensor("op_1392_cast_fp16")]; + tensor var_1394_equation_0 = const()[name = tensor("op_1394_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1394_cast_fp16 = einsum(equation = var_1394_equation_0, values = (var_1196_cast_fp16, var_1348_cast_fp16))[name = tensor("op_1394_cast_fp16")]; + tensor var_1396_equation_0 = const()[name = tensor("op_1396_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1396_cast_fp16 = einsum(equation = var_1396_equation_0, values = (var_1196_cast_fp16, var_1349_cast_fp16))[name = tensor("op_1396_cast_fp16")]; + tensor var_1398_equation_0 = const()[name = tensor("op_1398_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1398_cast_fp16 = einsum(equation = var_1398_equation_0, values = (var_1196_cast_fp16, var_1350_cast_fp16))[name = tensor("op_1398_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_779, interleave = x_27_interleave_0, values = (var_1352_cast_fp16, var_1354_cast_fp16, var_1356_cast_fp16, var_1358_cast_fp16, var_1360_cast_fp16, var_1362_cast_fp16, var_1364_cast_fp16, var_1366_cast_fp16, var_1368_cast_fp16, var_1370_cast_fp16, var_1372_cast_fp16, var_1374_cast_fp16, var_1376_cast_fp16, var_1378_cast_fp16, var_1380_cast_fp16, var_1382_cast_fp16, var_1384_cast_fp16, var_1386_cast_fp16, var_1388_cast_fp16, var_1390_cast_fp16, var_1392_cast_fp16, var_1394_cast_fp16, var_1396_cast_fp16, var_1398_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1403 = const()[name = tensor("op_1403"), val = tensor([1, 3072, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1403, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1406 = const()[name = tensor("op_1406"), val = tensor([1, 1])]; + tensor var_1408 = const()[name = tensor("op_1408"), val = tensor([1, 1])]; + tensor attention_output_pad_type_0 = const()[name = tensor("attention_output_pad_type_0"), val = tensor("custom")]; + tensor attention_output_pad_0 = const()[name = tensor("attention_output_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232803776)))]; + tensor attention_output_cast_fp16 = conv(dilations = var_1408, groups = var_779, pad = attention_output_pad_0, pad_type = attention_output_pad_type_0, strides = var_1406, weight = blocks_1_attn_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678208)))]; + tensor x_eps_cast_fp16 = concat(axis = var_779, interleave = x_eps_interleave_0, values = (x_29_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_782, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1434_to_fp16 = const()[name = tensor("op_1434_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1434_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678400)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1445 = const()[name = tensor("op_1445"), val = tensor([1, 1])]; + tensor var_1447 = const()[name = tensor("op_1447"), val = tensor([1, 1])]; + tensor input_17_pad_type_0 = const()[name = tensor("input_17_pad_type_0"), val = tensor("custom")]; + tensor input_17_pad_0 = const()[name = tensor("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251684608)))]; + tensor input_17_cast_fp16 = conv(dilations = var_1447, groups = var_779, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_1445, weight = blocks_1_mlp_fc_1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1451 = const()[name = tensor("op_1451"), val = tensor([1, 1])]; + tensor var_1453 = const()[name = tensor("op_1453"), val = tensor([1, 1])]; + tensor x_fc_2_pad_type_0 = const()[name = tensor("x_fc_2_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_pad_0 = const()[name = tensor("x_fc_2_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302016320)))]; + tensor x_fc_2_cast_fp16 = conv(dilations = var_1453, groups = var_779, pad = x_fc_2_pad_0, pad_type = x_fc_2_pad_type_0, strides = var_1451, weight = blocks_1_mlp_fc_2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_1456_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1456_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_1456_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_1459 = const()[name = tensor("op_1459"), val = tensor([1, 1])]; + tensor var_1461 = const()[name = tensor("op_1461"), val = tensor([1, 1])]; + tensor var_1463_pad_type_0 = const()[name = tensor("op_1463_pad_type_0"), val = tensor("custom")]; + tensor var_1463_pad_0 = const()[name = tensor("op_1463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(352348032)))]; + tensor var_1463_cast_fp16 = conv(dilations = var_1461, groups = var_779, pad = var_1463_pad_0, pad_type = var_1463_pad_type_0, strides = var_1459, weight = blocks_1_mlp_proj_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_1463_cast_fp16")]; + tensor new_x = add(x = var_1463_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_1464_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_v_cache_0, new_k_cache_1, new_v_cache_1); +} \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk4.mlmodelc/weights/weight.bin b/Llama-3.2-3B-Instruct_chunk4.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..a95364215864b82077bb488e2e8659af191e985a --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk4.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c5e404a08c1b8eb56e7784313a6ca21b01bc54978e598f1b184029e8f613e43 +size 402679744 diff --git a/Llama-3.2-3B-Instruct_chunk5.mlmodelc/analytics/coremldata.bin b/Llama-3.2-3B-Instruct_chunk5.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a63af39cde8e590e41fffd270ab8aede737490d --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk5.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf21e446e7587de3fd840eae95f3e79729298df568725552f7ef5fd8f954e58c +size 243 diff --git a/Llama-3.2-3B-Instruct_chunk5.mlmodelc/coremldata.bin b/Llama-3.2-3B-Instruct_chunk5.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef844658693d8a7fc2951abf2761f8f5f9bc62c3 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk5.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8129d684aa1ea8b76708a186fe44f7ffc4aa08b4854907105fe41c0825e71875 +size 653 diff --git a/Llama-3.2-3B-Instruct_chunk5.mlmodelc/metadata.json b/Llama-3.2-3B-Instruct_chunk5.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..dc528f3f5b0976cb12d6bc6af184b91a12c1efa4 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk5.mlmodelc/metadata.json @@ -0,0 +1,178 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 14, + "Ios16.mul" : 70, + "SliceByIndex" : 88, + "Transpose" : 2, + "Ios16.einsum" : 96, + "Ios16.conv" : 14, + "Ios16.add" : 56, + "Ios16.realDiv" : 4, + "Ios16.softmax" : 48, + "Ios16.reduceL2Norm" : 4, + "Ios16.reshape" : 14, + "Ios16.silu" : 2 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 512, 1, 64]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_3_2_3B_Instruct_2024_11_09_16_14_37_chunk5", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk5.mlmodelc/model.mil b/Llama-3.2-3B-Instruct_chunk5.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..78594b4291dc45ae43652f9a31200581b19ad3c6 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk5.mlmodelc/model.mil @@ -0,0 +1,956 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { + tensor var_13 = const()[name = tensor("op_13"), val = tensor(-1)]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-2)]; + tensor var_19 = const()[name = tensor("op_19"), val = tensor(-3)]; + tensor var_52 = const()[name = tensor("op_52"), val = tensor(1)]; + tensor var_55 = const()[name = tensor("op_55"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_52, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_55, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_79_to_fp16 = const()[name = tensor("op_79_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_79_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_100 = const()[name = tensor("op_100"), val = tensor([1, 3072, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_100, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_103 = const()[name = tensor("op_103"), val = tensor([1, 1])]; + tensor var_105 = const()[name = tensor("op_105"), val = tensor([1, 1])]; + tensor q_1_pad_type_0 = const()[name = tensor("q_1_pad_type_0"), val = tensor("custom")]; + tensor q_1_pad_0 = const()[name = tensor("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6464)))]; + tensor q_1_cast_fp16 = conv(dilations = var_105, groups = var_52, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = var_103, weight = blocks_0_attn_q_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_109 = const()[name = tensor("op_109"), val = tensor([1, 1])]; + tensor var_111 = const()[name = tensor("op_111"), val = tensor([1, 1])]; + tensor k_1_pad_type_0 = const()[name = tensor("k_1_pad_type_0"), val = tensor("custom")]; + tensor k_1_pad_0 = const()[name = tensor("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18880896)))]; + tensor k_1_cast_fp16 = conv(dilations = var_111, groups = var_52, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = var_109, weight = blocks_0_attn_k_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_115 = const()[name = tensor("op_115"), val = tensor([1, 1])]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor v_1_pad_type_0 = const()[name = tensor("v_1_pad_type_0"), val = tensor("custom")]; + tensor v_1_pad_0 = const()[name = tensor("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25172416)))]; + tensor v_1_cast_fp16 = conv(dilations = var_117, groups = var_52, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = var_115, weight = blocks_0_attn_v_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_120 = const()[name = tensor("op_120"), val = tensor([1, 24, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_120, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_122 = const()[name = tensor("op_122"), val = tensor([1, -1, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_122, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_136_begin_0 = const()[name = tensor("op_136_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_136_end_0 = const()[name = tensor("op_136_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_136_end_mask_0 = const()[name = tensor("op_136_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_136_cast_fp16 = slice_by_index(begin = var_136_begin_0, end = var_136_end_0, end_mask = var_136_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_136_cast_fp16")]; + tensor var_142_begin_0 = const()[name = tensor("op_142_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_142_end_0 = const()[name = tensor("op_142_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_142_end_mask_0 = const()[name = tensor("op_142_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_142_cast_fp16 = slice_by_index(begin = var_142_begin_0, end = var_142_end_0, end_mask = var_142_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_142_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_144_cast_fp16 = mul(x = var_142_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_144_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_17, interleave = rotated_1_interleave_0, values = (var_144_cast_fp16, var_136_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_147_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_147_cast_fp16")]; + tensor var_148_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_148_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_147_cast_fp16, y = var_148_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor var_167_begin_0 = const()[name = tensor("op_167_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_167_end_0 = const()[name = tensor("op_167_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_167_end_mask_0 = const()[name = tensor("op_167_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_167_cast_fp16 = slice_by_index(begin = var_167_begin_0, end = var_167_end_0, end_mask = var_167_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_167_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_169_cast_fp16 = mul(x = var_167_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_169_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_17, interleave = rotated_3_interleave_0, values = (var_169_cast_fp16, var_161_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_172_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_172_cast_fp16")]; + tensor var_173_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_173_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_172_cast_fp16, y = var_173_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_176 = const()[name = tensor("op_176"), val = tensor([1, -1, 1, 64])]; + tensor k_7_cast_fp16 = reshape(shape = var_176, x = roped_3_cast_fp16)[name = tensor("k_7_cast_fp16")]; + tensor var_178 = const()[name = tensor("op_178"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_178, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_11_interleave_0 = const()[name = tensor("k_11_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_9_perm_0, x = k_7_cast_fp16)[name = tensor("transpose_1")]; + tensor k_11_cast_fp16 = concat(axis = var_19, interleave = k_11_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_11_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_13, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1, 3072, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_186, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_191_begin_0 = const()[name = tensor("op_191_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_191_end_0 = const()[name = tensor("op_191_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_191_end_mask_0 = const()[name = tensor("op_191_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_191_cast_fp16 = slice_by_index(begin = var_191_begin_0, end = var_191_end_0, end_mask = var_191_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_191_cast_fp16")]; + tensor var_195_begin_0 = const()[name = tensor("op_195_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_195_end_0 = const()[name = tensor("op_195_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_195_end_mask_0 = const()[name = tensor("op_195_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_195_cast_fp16 = slice_by_index(begin = var_195_begin_0, end = var_195_end_0, end_mask = var_195_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_195_cast_fp16")]; + tensor var_199_begin_0 = const()[name = tensor("op_199_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_199_end_0 = const()[name = tensor("op_199_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_199_end_mask_0 = const()[name = tensor("op_199_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_199_cast_fp16 = slice_by_index(begin = var_199_begin_0, end = var_199_end_0, end_mask = var_199_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_199_cast_fp16")]; + tensor var_203_begin_0 = const()[name = tensor("op_203_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_203_end_0 = const()[name = tensor("op_203_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_203_end_mask_0 = const()[name = tensor("op_203_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_203_cast_fp16 = slice_by_index(begin = var_203_begin_0, end = var_203_end_0, end_mask = var_203_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_203_cast_fp16")]; + tensor var_207_begin_0 = const()[name = tensor("op_207_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_207_end_0 = const()[name = tensor("op_207_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_207_end_mask_0 = const()[name = tensor("op_207_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = var_207_end_0, end_mask = var_207_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_207_cast_fp16")]; + tensor var_211_begin_0 = const()[name = tensor("op_211_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_211_end_0 = const()[name = tensor("op_211_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_211_end_mask_0 = const()[name = tensor("op_211_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_211_cast_fp16 = slice_by_index(begin = var_211_begin_0, end = var_211_end_0, end_mask = var_211_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_211_cast_fp16")]; + tensor var_215_begin_0 = const()[name = tensor("op_215_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_215_end_0 = const()[name = tensor("op_215_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_215_end_mask_0 = const()[name = tensor("op_215_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_215_cast_fp16 = slice_by_index(begin = var_215_begin_0, end = var_215_end_0, end_mask = var_215_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_215_cast_fp16")]; + tensor var_219_begin_0 = const()[name = tensor("op_219_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_219_end_0 = const()[name = tensor("op_219_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_219_end_mask_0 = const()[name = tensor("op_219_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_219_cast_fp16 = slice_by_index(begin = var_219_begin_0, end = var_219_end_0, end_mask = var_219_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_219_cast_fp16")]; + tensor var_223_begin_0 = const()[name = tensor("op_223_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_223_end_0 = const()[name = tensor("op_223_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_223_end_mask_0 = const()[name = tensor("op_223_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_223_cast_fp16 = slice_by_index(begin = var_223_begin_0, end = var_223_end_0, end_mask = var_223_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_223_cast_fp16")]; + tensor var_227_begin_0 = const()[name = tensor("op_227_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_227_end_0 = const()[name = tensor("op_227_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_227_end_mask_0 = const()[name = tensor("op_227_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_227_cast_fp16")]; + tensor var_231_begin_0 = const()[name = tensor("op_231_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_231_end_0 = const()[name = tensor("op_231_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_231_end_mask_0 = const()[name = tensor("op_231_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_231_cast_fp16 = slice_by_index(begin = var_231_begin_0, end = var_231_end_0, end_mask = var_231_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_231_cast_fp16")]; + tensor var_235_begin_0 = const()[name = tensor("op_235_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_235_end_0 = const()[name = tensor("op_235_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_235_end_mask_0 = const()[name = tensor("op_235_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = var_235_end_0, end_mask = var_235_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_235_cast_fp16")]; + tensor var_239_begin_0 = const()[name = tensor("op_239_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_239_end_0 = const()[name = tensor("op_239_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_239_end_mask_0 = const()[name = tensor("op_239_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_239_cast_fp16")]; + tensor var_243_begin_0 = const()[name = tensor("op_243_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_243_end_0 = const()[name = tensor("op_243_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_243_end_mask_0 = const()[name = tensor("op_243_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_243_cast_fp16")]; + tensor var_247_begin_0 = const()[name = tensor("op_247_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_247_end_0 = const()[name = tensor("op_247_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_247_end_mask_0 = const()[name = tensor("op_247_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_247_cast_fp16")]; + tensor var_251_begin_0 = const()[name = tensor("op_251_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_251_end_0 = const()[name = tensor("op_251_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_251_end_mask_0 = const()[name = tensor("op_251_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_251_cast_fp16")]; + tensor var_255_begin_0 = const()[name = tensor("op_255_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_255_end_0 = const()[name = tensor("op_255_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_255_end_mask_0 = const()[name = tensor("op_255_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_255_cast_fp16")]; + tensor var_259_begin_0 = const()[name = tensor("op_259_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_259_end_0 = const()[name = tensor("op_259_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_259_end_mask_0 = const()[name = tensor("op_259_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_263_cast_fp16")]; + tensor var_267_begin_0 = const()[name = tensor("op_267_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_267_end_0 = const()[name = tensor("op_267_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_267_end_mask_0 = const()[name = tensor("op_267_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_267_cast_fp16")]; + tensor var_271_begin_0 = const()[name = tensor("op_271_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_271_end_0 = const()[name = tensor("op_271_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_271_end_mask_0 = const()[name = tensor("op_271_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_271_cast_fp16")]; + tensor var_275_begin_0 = const()[name = tensor("op_275_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_275_end_0 = const()[name = tensor("op_275_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_275_end_mask_0 = const()[name = tensor("op_275_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_275_cast_fp16")]; + tensor var_279_begin_0 = const()[name = tensor("op_279_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_279_end_0 = const()[name = tensor("op_279_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_279_end_mask_0 = const()[name = tensor("op_279_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_279_cast_fp16")]; + tensor var_283_begin_0 = const()[name = tensor("op_283_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_283_end_0 = const()[name = tensor("op_283_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_283_end_mask_0 = const()[name = tensor("op_283_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_283_cast_fp16")]; + tensor var_289_begin_0 = const()[name = tensor("op_289_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_289_end_0 = const()[name = tensor("op_289_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_289_end_mask_0 = const()[name = tensor("op_289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_289_cast_fp16")]; + tensor var_301_begin_0 = const()[name = tensor("op_301_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_301_end_0 = const()[name = tensor("op_301_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_301_end_mask_0 = const()[name = tensor("op_301_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_301_cast_fp16")]; + tensor var_313_begin_0 = const()[name = tensor("op_313_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_313_end_0 = const()[name = tensor("op_313_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_313_end_mask_0 = const()[name = tensor("op_313_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_313_cast_fp16 = slice_by_index(begin = var_313_begin_0, end = var_313_end_0, end_mask = var_313_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_313_cast_fp16")]; + tensor var_325_begin_0 = const()[name = tensor("op_325_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_325_end_0 = const()[name = tensor("op_325_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_325_end_mask_0 = const()[name = tensor("op_325_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_325_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_349_begin_0 = const()[name = tensor("op_349_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_349_end_0 = const()[name = tensor("op_349_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_349_end_mask_0 = const()[name = tensor("op_349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_349_cast_fp16 = slice_by_index(begin = var_349_begin_0, end = var_349_end_0, end_mask = var_349_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_349_cast_fp16")]; + tensor var_361_begin_0 = const()[name = tensor("op_361_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_361_end_0 = const()[name = tensor("op_361_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_361_end_mask_0 = const()[name = tensor("op_361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_361_cast_fp16 = slice_by_index(begin = var_361_begin_0, end = var_361_end_0, end_mask = var_361_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_361_cast_fp16")]; + tensor var_373_begin_0 = const()[name = tensor("op_373_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_373_end_0 = const()[name = tensor("op_373_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_373_end_mask_0 = const()[name = tensor("op_373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_373_cast_fp16")]; + tensor var_383_begin_0 = const()[name = tensor("op_383_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_383_end_0 = const()[name = tensor("op_383_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_383_end_mask_0 = const()[name = tensor("op_383_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_383_cast_fp16")]; + tensor var_395_begin_0 = const()[name = tensor("op_395_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_395_end_0 = const()[name = tensor("op_395_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_395_end_mask_0 = const()[name = tensor("op_395_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_395_cast_fp16")]; + tensor var_407_begin_0 = const()[name = tensor("op_407_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_407_end_0 = const()[name = tensor("op_407_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_407_end_mask_0 = const()[name = tensor("op_407_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_407_cast_fp16 = slice_by_index(begin = var_407_begin_0, end = var_407_end_0, end_mask = var_407_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_407_cast_fp16")]; + tensor var_419_begin_0 = const()[name = tensor("op_419_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_419_end_0 = const()[name = tensor("op_419_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_419_end_mask_0 = const()[name = tensor("op_419_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_419_cast_fp16 = slice_by_index(begin = var_419_begin_0, end = var_419_end_0, end_mask = var_419_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_419_cast_fp16")]; + tensor var_431_begin_0 = const()[name = tensor("op_431_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_431_end_0 = const()[name = tensor("op_431_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_431_end_mask_0 = const()[name = tensor("op_431_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_431_cast_fp16 = slice_by_index(begin = var_431_begin_0, end = var_431_end_0, end_mask = var_431_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_431_cast_fp16")]; + tensor var_443_begin_0 = const()[name = tensor("op_443_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_443_end_0 = const()[name = tensor("op_443_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_443_end_mask_0 = const()[name = tensor("op_443_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_443_cast_fp16 = slice_by_index(begin = var_443_begin_0, end = var_443_end_0, end_mask = var_443_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_443_cast_fp16")]; + tensor var_455_begin_0 = const()[name = tensor("op_455_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_455_end_0 = const()[name = tensor("op_455_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_455_end_mask_0 = const()[name = tensor("op_455_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_455_cast_fp16")]; + tensor var_467_begin_0 = const()[name = tensor("op_467_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_467_end_0 = const()[name = tensor("op_467_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_467_end_mask_0 = const()[name = tensor("op_467_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_467_cast_fp16 = slice_by_index(begin = var_467_begin_0, end = var_467_end_0, end_mask = var_467_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_467_cast_fp16")]; + tensor var_479_equation_0 = const()[name = tensor("op_479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_289_cast_fp16, var_191_cast_fp16))[name = tensor("op_479_cast_fp16")]; + tensor var_480_to_fp16 = const()[name = tensor("op_480_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_481_cast_fp16 = mul(x = var_479_cast_fp16, y = var_480_to_fp16)[name = tensor("op_481_cast_fp16")]; + tensor var_483_equation_0 = const()[name = tensor("op_483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_289_cast_fp16, var_195_cast_fp16))[name = tensor("op_483_cast_fp16")]; + tensor var_484_to_fp16 = const()[name = tensor("op_484_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_485_cast_fp16 = mul(x = var_483_cast_fp16, y = var_484_to_fp16)[name = tensor("op_485_cast_fp16")]; + tensor var_487_equation_0 = const()[name = tensor("op_487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_487_cast_fp16 = einsum(equation = var_487_equation_0, values = (var_289_cast_fp16, var_199_cast_fp16))[name = tensor("op_487_cast_fp16")]; + tensor var_488_to_fp16 = const()[name = tensor("op_488_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_489_cast_fp16 = mul(x = var_487_cast_fp16, y = var_488_to_fp16)[name = tensor("op_489_cast_fp16")]; + tensor var_491_equation_0 = const()[name = tensor("op_491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_491_cast_fp16 = einsum(equation = var_491_equation_0, values = (var_301_cast_fp16, var_203_cast_fp16))[name = tensor("op_491_cast_fp16")]; + tensor var_492_to_fp16 = const()[name = tensor("op_492_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_493_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = tensor("op_493_cast_fp16")]; + tensor var_495_equation_0 = const()[name = tensor("op_495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_495_cast_fp16 = einsum(equation = var_495_equation_0, values = (var_301_cast_fp16, var_207_cast_fp16))[name = tensor("op_495_cast_fp16")]; + tensor var_496_to_fp16 = const()[name = tensor("op_496_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_497_cast_fp16 = mul(x = var_495_cast_fp16, y = var_496_to_fp16)[name = tensor("op_497_cast_fp16")]; + tensor var_499_equation_0 = const()[name = tensor("op_499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_499_cast_fp16 = einsum(equation = var_499_equation_0, values = (var_301_cast_fp16, var_211_cast_fp16))[name = tensor("op_499_cast_fp16")]; + tensor var_500_to_fp16 = const()[name = tensor("op_500_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_501_cast_fp16 = mul(x = var_499_cast_fp16, y = var_500_to_fp16)[name = tensor("op_501_cast_fp16")]; + tensor var_503_equation_0 = const()[name = tensor("op_503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_503_cast_fp16 = einsum(equation = var_503_equation_0, values = (var_313_cast_fp16, var_215_cast_fp16))[name = tensor("op_503_cast_fp16")]; + tensor var_504_to_fp16 = const()[name = tensor("op_504_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_505_cast_fp16 = mul(x = var_503_cast_fp16, y = var_504_to_fp16)[name = tensor("op_505_cast_fp16")]; + tensor var_507_equation_0 = const()[name = tensor("op_507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_507_cast_fp16 = einsum(equation = var_507_equation_0, values = (var_313_cast_fp16, var_219_cast_fp16))[name = tensor("op_507_cast_fp16")]; + tensor var_508_to_fp16 = const()[name = tensor("op_508_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16, y = var_508_to_fp16)[name = tensor("op_509_cast_fp16")]; + tensor var_511_equation_0 = const()[name = tensor("op_511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_511_cast_fp16 = einsum(equation = var_511_equation_0, values = (var_313_cast_fp16, var_223_cast_fp16))[name = tensor("op_511_cast_fp16")]; + tensor var_512_to_fp16 = const()[name = tensor("op_512_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_513_cast_fp16 = mul(x = var_511_cast_fp16, y = var_512_to_fp16)[name = tensor("op_513_cast_fp16")]; + tensor var_515_equation_0 = const()[name = tensor("op_515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_515_cast_fp16 = einsum(equation = var_515_equation_0, values = (var_325_cast_fp16, var_227_cast_fp16))[name = tensor("op_515_cast_fp16")]; + tensor var_516_to_fp16 = const()[name = tensor("op_516_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_517_cast_fp16 = mul(x = var_515_cast_fp16, y = var_516_to_fp16)[name = tensor("op_517_cast_fp16")]; + tensor var_519_equation_0 = const()[name = tensor("op_519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_519_cast_fp16 = einsum(equation = var_519_equation_0, values = (var_325_cast_fp16, var_231_cast_fp16))[name = tensor("op_519_cast_fp16")]; + tensor var_520_to_fp16 = const()[name = tensor("op_520_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_521_cast_fp16 = mul(x = var_519_cast_fp16, y = var_520_to_fp16)[name = tensor("op_521_cast_fp16")]; + tensor var_523_equation_0 = const()[name = tensor("op_523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_325_cast_fp16, var_235_cast_fp16))[name = tensor("op_523_cast_fp16")]; + tensor var_524_to_fp16 = const()[name = tensor("op_524_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_525_cast_fp16 = mul(x = var_523_cast_fp16, y = var_524_to_fp16)[name = tensor("op_525_cast_fp16")]; + tensor var_527_equation_0 = const()[name = tensor("op_527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_337_cast_fp16, var_239_cast_fp16))[name = tensor("op_527_cast_fp16")]; + tensor var_528_to_fp16 = const()[name = tensor("op_528_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_529_cast_fp16 = mul(x = var_527_cast_fp16, y = var_528_to_fp16)[name = tensor("op_529_cast_fp16")]; + tensor var_531_equation_0 = const()[name = tensor("op_531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_337_cast_fp16, var_243_cast_fp16))[name = tensor("op_531_cast_fp16")]; + tensor var_532_to_fp16 = const()[name = tensor("op_532_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_533_cast_fp16 = mul(x = var_531_cast_fp16, y = var_532_to_fp16)[name = tensor("op_533_cast_fp16")]; + tensor var_535_equation_0 = const()[name = tensor("op_535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_337_cast_fp16, var_247_cast_fp16))[name = tensor("op_535_cast_fp16")]; + tensor var_536_to_fp16 = const()[name = tensor("op_536_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_537_cast_fp16 = mul(x = var_535_cast_fp16, y = var_536_to_fp16)[name = tensor("op_537_cast_fp16")]; + tensor var_539_equation_0 = const()[name = tensor("op_539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_539_cast_fp16 = einsum(equation = var_539_equation_0, values = (var_349_cast_fp16, var_251_cast_fp16))[name = tensor("op_539_cast_fp16")]; + tensor var_540_to_fp16 = const()[name = tensor("op_540_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_541_cast_fp16 = mul(x = var_539_cast_fp16, y = var_540_to_fp16)[name = tensor("op_541_cast_fp16")]; + tensor var_543_equation_0 = const()[name = tensor("op_543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_543_cast_fp16 = einsum(equation = var_543_equation_0, values = (var_349_cast_fp16, var_255_cast_fp16))[name = tensor("op_543_cast_fp16")]; + tensor var_544_to_fp16 = const()[name = tensor("op_544_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_545_cast_fp16 = mul(x = var_543_cast_fp16, y = var_544_to_fp16)[name = tensor("op_545_cast_fp16")]; + tensor var_547_equation_0 = const()[name = tensor("op_547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_349_cast_fp16, var_259_cast_fp16))[name = tensor("op_547_cast_fp16")]; + tensor var_548_to_fp16 = const()[name = tensor("op_548_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_549_cast_fp16 = mul(x = var_547_cast_fp16, y = var_548_to_fp16)[name = tensor("op_549_cast_fp16")]; + tensor var_551_equation_0 = const()[name = tensor("op_551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_361_cast_fp16, var_263_cast_fp16))[name = tensor("op_551_cast_fp16")]; + tensor var_552_to_fp16 = const()[name = tensor("op_552_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_553_cast_fp16 = mul(x = var_551_cast_fp16, y = var_552_to_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_555_equation_0 = const()[name = tensor("op_555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_361_cast_fp16, var_267_cast_fp16))[name = tensor("op_555_cast_fp16")]; + tensor var_556_to_fp16 = const()[name = tensor("op_556_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_557_cast_fp16 = mul(x = var_555_cast_fp16, y = var_556_to_fp16)[name = tensor("op_557_cast_fp16")]; + tensor var_559_equation_0 = const()[name = tensor("op_559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_361_cast_fp16, var_271_cast_fp16))[name = tensor("op_559_cast_fp16")]; + tensor var_560_to_fp16 = const()[name = tensor("op_560_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = var_560_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor var_563_equation_0 = const()[name = tensor("op_563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_373_cast_fp16, var_275_cast_fp16))[name = tensor("op_563_cast_fp16")]; + tensor var_564_to_fp16 = const()[name = tensor("op_564_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_565_cast_fp16 = mul(x = var_563_cast_fp16, y = var_564_to_fp16)[name = tensor("op_565_cast_fp16")]; + tensor var_567_equation_0 = const()[name = tensor("op_567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_373_cast_fp16, var_279_cast_fp16))[name = tensor("op_567_cast_fp16")]; + tensor var_568_to_fp16 = const()[name = tensor("op_568_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_569_cast_fp16 = mul(x = var_567_cast_fp16, y = var_568_to_fp16)[name = tensor("op_569_cast_fp16")]; + tensor var_571_equation_0 = const()[name = tensor("op_571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_373_cast_fp16, var_283_cast_fp16))[name = tensor("op_571_cast_fp16")]; + tensor var_572_to_fp16 = const()[name = tensor("op_572_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_573_cast_fp16 = mul(x = var_571_cast_fp16, y = var_572_to_fp16)[name = tensor("op_573_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_481_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_485_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_489_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_493_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_497_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_501_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_505_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_509_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_513_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_517_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_521_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_525_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_529_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_533_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_537_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_541_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_545_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_549_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_553_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_557_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_561_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_565_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_569_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_573_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor var_598_cast_fp16 = softmax(axis = var_52, x = aw_1_cast_fp16)[name = tensor("op_598_cast_fp16")]; + tensor var_599_cast_fp16 = softmax(axis = var_52, x = aw_3_cast_fp16)[name = tensor("op_599_cast_fp16")]; + tensor var_600_cast_fp16 = softmax(axis = var_52, x = aw_5_cast_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_601_cast_fp16 = softmax(axis = var_52, x = aw_7_cast_fp16)[name = tensor("op_601_cast_fp16")]; + tensor var_602_cast_fp16 = softmax(axis = var_52, x = aw_9_cast_fp16)[name = tensor("op_602_cast_fp16")]; + tensor var_603_cast_fp16 = softmax(axis = var_52, x = aw_11_cast_fp16)[name = tensor("op_603_cast_fp16")]; + tensor var_604_cast_fp16 = softmax(axis = var_52, x = aw_13_cast_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_605_cast_fp16 = softmax(axis = var_52, x = aw_15_cast_fp16)[name = tensor("op_605_cast_fp16")]; + tensor var_606_cast_fp16 = softmax(axis = var_52, x = aw_17_cast_fp16)[name = tensor("op_606_cast_fp16")]; + tensor var_607_cast_fp16 = softmax(axis = var_52, x = aw_19_cast_fp16)[name = tensor("op_607_cast_fp16")]; + tensor var_608_cast_fp16 = softmax(axis = var_52, x = aw_21_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_609_cast_fp16 = softmax(axis = var_52, x = aw_23_cast_fp16)[name = tensor("op_609_cast_fp16")]; + tensor var_610_cast_fp16 = softmax(axis = var_52, x = aw_25_cast_fp16)[name = tensor("op_610_cast_fp16")]; + tensor var_611_cast_fp16 = softmax(axis = var_52, x = aw_27_cast_fp16)[name = tensor("op_611_cast_fp16")]; + tensor var_612_cast_fp16 = softmax(axis = var_52, x = aw_29_cast_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_613_cast_fp16 = softmax(axis = var_52, x = aw_31_cast_fp16)[name = tensor("op_613_cast_fp16")]; + tensor var_614_cast_fp16 = softmax(axis = var_52, x = aw_33_cast_fp16)[name = tensor("op_614_cast_fp16")]; + tensor var_615_cast_fp16 = softmax(axis = var_52, x = aw_35_cast_fp16)[name = tensor("op_615_cast_fp16")]; + tensor var_616_cast_fp16 = softmax(axis = var_52, x = aw_37_cast_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_617_cast_fp16 = softmax(axis = var_52, x = aw_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618_cast_fp16 = softmax(axis = var_52, x = aw_41_cast_fp16)[name = tensor("op_618_cast_fp16")]; + tensor var_619_cast_fp16 = softmax(axis = var_52, x = aw_43_cast_fp16)[name = tensor("op_619_cast_fp16")]; + tensor var_620_cast_fp16 = softmax(axis = var_52, x = aw_45_cast_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_621_cast_fp16 = softmax(axis = var_52, x = aw_47_cast_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_623_equation_0 = const()[name = tensor("op_623_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_383_cast_fp16, var_598_cast_fp16))[name = tensor("op_623_cast_fp16")]; + tensor var_625_equation_0 = const()[name = tensor("op_625_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_383_cast_fp16, var_599_cast_fp16))[name = tensor("op_625_cast_fp16")]; + tensor var_627_equation_0 = const()[name = tensor("op_627_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_383_cast_fp16, var_600_cast_fp16))[name = tensor("op_627_cast_fp16")]; + tensor var_629_equation_0 = const()[name = tensor("op_629_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_395_cast_fp16, var_601_cast_fp16))[name = tensor("op_629_cast_fp16")]; + tensor var_631_equation_0 = const()[name = tensor("op_631_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_395_cast_fp16, var_602_cast_fp16))[name = tensor("op_631_cast_fp16")]; + tensor var_633_equation_0 = const()[name = tensor("op_633_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_395_cast_fp16, var_603_cast_fp16))[name = tensor("op_633_cast_fp16")]; + tensor var_635_equation_0 = const()[name = tensor("op_635_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_407_cast_fp16, var_604_cast_fp16))[name = tensor("op_635_cast_fp16")]; + tensor var_637_equation_0 = const()[name = tensor("op_637_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_407_cast_fp16, var_605_cast_fp16))[name = tensor("op_637_cast_fp16")]; + tensor var_639_equation_0 = const()[name = tensor("op_639_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_407_cast_fp16, var_606_cast_fp16))[name = tensor("op_639_cast_fp16")]; + tensor var_641_equation_0 = const()[name = tensor("op_641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_419_cast_fp16, var_607_cast_fp16))[name = tensor("op_641_cast_fp16")]; + tensor var_643_equation_0 = const()[name = tensor("op_643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_419_cast_fp16, var_608_cast_fp16))[name = tensor("op_643_cast_fp16")]; + tensor var_645_equation_0 = const()[name = tensor("op_645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_419_cast_fp16, var_609_cast_fp16))[name = tensor("op_645_cast_fp16")]; + tensor var_647_equation_0 = const()[name = tensor("op_647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_647_cast_fp16 = einsum(equation = var_647_equation_0, values = (var_431_cast_fp16, var_610_cast_fp16))[name = tensor("op_647_cast_fp16")]; + tensor var_649_equation_0 = const()[name = tensor("op_649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_649_cast_fp16 = einsum(equation = var_649_equation_0, values = (var_431_cast_fp16, var_611_cast_fp16))[name = tensor("op_649_cast_fp16")]; + tensor var_651_equation_0 = const()[name = tensor("op_651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_651_cast_fp16 = einsum(equation = var_651_equation_0, values = (var_431_cast_fp16, var_612_cast_fp16))[name = tensor("op_651_cast_fp16")]; + tensor var_653_equation_0 = const()[name = tensor("op_653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_653_cast_fp16 = einsum(equation = var_653_equation_0, values = (var_443_cast_fp16, var_613_cast_fp16))[name = tensor("op_653_cast_fp16")]; + tensor var_655_equation_0 = const()[name = tensor("op_655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_655_cast_fp16 = einsum(equation = var_655_equation_0, values = (var_443_cast_fp16, var_614_cast_fp16))[name = tensor("op_655_cast_fp16")]; + tensor var_657_equation_0 = const()[name = tensor("op_657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_657_cast_fp16 = einsum(equation = var_657_equation_0, values = (var_443_cast_fp16, var_615_cast_fp16))[name = tensor("op_657_cast_fp16")]; + tensor var_659_equation_0 = const()[name = tensor("op_659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_659_cast_fp16 = einsum(equation = var_659_equation_0, values = (var_455_cast_fp16, var_616_cast_fp16))[name = tensor("op_659_cast_fp16")]; + tensor var_661_equation_0 = const()[name = tensor("op_661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_661_cast_fp16 = einsum(equation = var_661_equation_0, values = (var_455_cast_fp16, var_617_cast_fp16))[name = tensor("op_661_cast_fp16")]; + tensor var_663_equation_0 = const()[name = tensor("op_663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_663_cast_fp16 = einsum(equation = var_663_equation_0, values = (var_455_cast_fp16, var_618_cast_fp16))[name = tensor("op_663_cast_fp16")]; + tensor var_665_equation_0 = const()[name = tensor("op_665_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_665_cast_fp16 = einsum(equation = var_665_equation_0, values = (var_467_cast_fp16, var_619_cast_fp16))[name = tensor("op_665_cast_fp16")]; + tensor var_667_equation_0 = const()[name = tensor("op_667_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_667_cast_fp16 = einsum(equation = var_667_equation_0, values = (var_467_cast_fp16, var_620_cast_fp16))[name = tensor("op_667_cast_fp16")]; + tensor var_669_equation_0 = const()[name = tensor("op_669_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_669_cast_fp16 = einsum(equation = var_669_equation_0, values = (var_467_cast_fp16, var_621_cast_fp16))[name = tensor("op_669_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_52, interleave = x_11_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16, var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16, var_647_cast_fp16, var_649_cast_fp16, var_651_cast_fp16, var_653_cast_fp16, var_655_cast_fp16, var_657_cast_fp16, var_659_cast_fp16, var_661_cast_fp16, var_663_cast_fp16, var_665_cast_fp16, var_667_cast_fp16, var_669_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_674 = const()[name = tensor("op_674"), val = tensor([1, 3072, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_674, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_677 = const()[name = tensor("op_677"), val = tensor([1, 1])]; + tensor var_679 = const()[name = tensor("op_679"), val = tensor([1, 1])]; + tensor attention_output_1_pad_type_0 = const()[name = tensor("attention_output_1_pad_type_0"), val = tensor("custom")]; + tensor attention_output_1_pad_0 = const()[name = tensor("attention_output_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31463936)))]; + tensor attention_output_1_cast_fp16 = conv(dilations = var_679, groups = var_52, pad = attention_output_1_pad_0, pad_type = attention_output_1_pad_type_0, strides = var_677, weight = blocks_0_attn_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338368)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_52, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_55, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_705_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338560)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_716 = const()[name = tensor("op_716"), val = tensor([1, 1])]; + tensor var_718 = const()[name = tensor("op_718"), val = tensor([1, 1])]; + tensor input_7_pad_type_0 = const()[name = tensor("input_7_pad_type_0"), val = tensor("custom")]; + tensor input_7_pad_0 = const()[name = tensor("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50344768)))]; + tensor input_7_cast_fp16 = conv(dilations = var_718, groups = var_52, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_716, weight = blocks_0_mlp_fc_1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_722 = const()[name = tensor("op_722"), val = tensor([1, 1])]; + tensor var_724 = const()[name = tensor("op_724"), val = tensor([1, 1])]; + tensor x_fc_2_1_pad_type_0 = const()[name = tensor("x_fc_2_1_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_1_pad_0 = const()[name = tensor("x_fc_2_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100676480)))]; + tensor x_fc_2_1_cast_fp16 = conv(dilations = var_724, groups = var_52, pad = x_fc_2_1_pad_0, pad_type = x_fc_2_1_pad_type_0, strides = var_722, weight = blocks_0_mlp_fc_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_727_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_727_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_727_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_730 = const()[name = tensor("op_730"), val = tensor([1, 1])]; + tensor var_732 = const()[name = tensor("op_732"), val = tensor([1, 1])]; + tensor var_734_pad_type_0 = const()[name = tensor("op_734_pad_type_0"), val = tensor("custom")]; + tensor var_734_pad_0 = const()[name = tensor("op_734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151008192)))]; + tensor var_734_cast_fp16 = conv(dilations = var_732, groups = var_52, pad = var_734_pad_0, pad_type = var_734_pad_type_0, strides = var_730, weight = blocks_0_mlp_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("op_734_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_734_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_740 = const()[name = tensor("op_740"), val = tensor(-1)]; + tensor var_744 = const()[name = tensor("op_744"), val = tensor(-2)]; + tensor var_746 = const()[name = tensor("op_746"), val = tensor(-3)]; + tensor var_779 = const()[name = tensor("op_779"), val = tensor(1)]; + tensor var_782 = const()[name = tensor("op_782"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201339904)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_779, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_782, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_805_to_fp16 = const()[name = tensor("op_805_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_805_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201340096)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_829 = const()[name = tensor("op_829"), val = tensor([1, 3072, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_829, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_832 = const()[name = tensor("op_832"), val = tensor([1, 1])]; + tensor var_834 = const()[name = tensor("op_834"), val = tensor([1, 1])]; + tensor q_9_pad_type_0 = const()[name = tensor("q_9_pad_type_0"), val = tensor("custom")]; + tensor q_9_pad_0 = const()[name = tensor("q_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201346304)))]; + tensor q_9_cast_fp16 = conv(dilations = var_834, groups = var_779, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = var_832, weight = blocks_1_attn_q_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_838 = const()[name = tensor("op_838"), val = tensor([1, 1])]; + tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 1])]; + tensor k_13_pad_type_0 = const()[name = tensor("k_13_pad_type_0"), val = tensor("custom")]; + tensor k_13_pad_0 = const()[name = tensor("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220220736)))]; + tensor k_13_cast_fp16 = conv(dilations = var_840, groups = var_779, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = var_838, weight = blocks_1_attn_k_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_844 = const()[name = tensor("op_844"), val = tensor([1, 1])]; + tensor var_846 = const()[name = tensor("op_846"), val = tensor([1, 1])]; + tensor v_11_pad_type_0 = const()[name = tensor("v_11_pad_type_0"), val = tensor("custom")]; + tensor v_11_pad_0 = const()[name = tensor("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226512256)))]; + tensor v_11_cast_fp16 = conv(dilations = var_846, groups = var_779, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = var_844, weight = blocks_1_attn_v_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_849 = const()[name = tensor("op_849"), val = tensor([1, 24, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_849, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_851 = const()[name = tensor("op_851"), val = tensor([1, -1, 128, 64])]; + tensor k_15_cast_fp16 = reshape(shape = var_851, x = k_13_cast_fp16)[name = tensor("k_15_cast_fp16")]; + tensor var_865_begin_0 = const()[name = tensor("op_865_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_865_end_0 = const()[name = tensor("op_865_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_865_end_mask_0 = const()[name = tensor("op_865_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_865_cast_fp16")]; + tensor var_871_begin_0 = const()[name = tensor("op_871_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_871_end_0 = const()[name = tensor("op_871_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_871_end_mask_0 = const()[name = tensor("op_871_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_871_cast_fp16 = slice_by_index(begin = var_871_begin_0, end = var_871_end_0, end_mask = var_871_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_871_cast_fp16")]; + tensor const_30_promoted_to_fp16 = const()[name = tensor("const_30_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_873_cast_fp16 = mul(x = var_871_cast_fp16, y = const_30_promoted_to_fp16)[name = tensor("op_873_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_744, interleave = rotated_5_interleave_0, values = (var_873_cast_fp16, var_865_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_876_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_876_cast_fp16")]; + tensor var_877_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_877_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_876_cast_fp16, y = var_877_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_890_begin_0 = const()[name = tensor("op_890_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_890_end_0 = const()[name = tensor("op_890_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_890_end_mask_0 = const()[name = tensor("op_890_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_890_cast_fp16 = slice_by_index(begin = var_890_begin_0, end = var_890_end_0, end_mask = var_890_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_890_cast_fp16")]; + tensor var_896_begin_0 = const()[name = tensor("op_896_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_896_end_0 = const()[name = tensor("op_896_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_896_end_mask_0 = const()[name = tensor("op_896_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_896_cast_fp16 = slice_by_index(begin = var_896_begin_0, end = var_896_end_0, end_mask = var_896_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_896_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_898_cast_fp16 = mul(x = var_896_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_898_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_744, interleave = rotated_interleave_0, values = (var_898_cast_fp16, var_890_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_901_cast_fp16 = mul(x = k_15_cast_fp16, y = cos)[name = tensor("op_901_cast_fp16")]; + tensor var_902_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_902_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_901_cast_fp16, y = var_902_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_905 = const()[name = tensor("op_905"), val = tensor([1, -1, 1, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_905, x = roped_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_907 = const()[name = tensor("op_907"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_907, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_21_perm_0, x = k_19_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_746, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_740, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_915 = const()[name = tensor("op_915"), val = tensor([1, 3072, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_915, x = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_920_begin_0 = const()[name = tensor("op_920_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_end_0 = const()[name = tensor("op_920_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_920_end_mask_0 = const()[name = tensor("op_920_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = q_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor var_924_begin_0 = const()[name = tensor("op_924_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_924_end_0 = const()[name = tensor("op_924_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_924_end_mask_0 = const()[name = tensor("op_924_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_924_cast_fp16 = slice_by_index(begin = var_924_begin_0, end = var_924_end_0, end_mask = var_924_end_mask_0, x = q_cast_fp16)[name = tensor("op_924_cast_fp16")]; + tensor var_928_begin_0 = const()[name = tensor("op_928_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_928_end_0 = const()[name = tensor("op_928_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_928_end_mask_0 = const()[name = tensor("op_928_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = q_cast_fp16)[name = tensor("op_928_cast_fp16")]; + tensor var_932_begin_0 = const()[name = tensor("op_932_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_932_end_0 = const()[name = tensor("op_932_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_932_end_mask_0 = const()[name = tensor("op_932_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_932_cast_fp16 = slice_by_index(begin = var_932_begin_0, end = var_932_end_0, end_mask = var_932_end_mask_0, x = q_cast_fp16)[name = tensor("op_932_cast_fp16")]; + tensor var_936_begin_0 = const()[name = tensor("op_936_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_936_end_0 = const()[name = tensor("op_936_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_936_end_mask_0 = const()[name = tensor("op_936_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_936_cast_fp16 = slice_by_index(begin = var_936_begin_0, end = var_936_end_0, end_mask = var_936_end_mask_0, x = q_cast_fp16)[name = tensor("op_936_cast_fp16")]; + tensor var_940_begin_0 = const()[name = tensor("op_940_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_940_end_0 = const()[name = tensor("op_940_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_940_end_mask_0 = const()[name = tensor("op_940_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_940_cast_fp16 = slice_by_index(begin = var_940_begin_0, end = var_940_end_0, end_mask = var_940_end_mask_0, x = q_cast_fp16)[name = tensor("op_940_cast_fp16")]; + tensor var_944_begin_0 = const()[name = tensor("op_944_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_944_end_0 = const()[name = tensor("op_944_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_944_end_mask_0 = const()[name = tensor("op_944_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_944_cast_fp16 = slice_by_index(begin = var_944_begin_0, end = var_944_end_0, end_mask = var_944_end_mask_0, x = q_cast_fp16)[name = tensor("op_944_cast_fp16")]; + tensor var_948_begin_0 = const()[name = tensor("op_948_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_948_end_0 = const()[name = tensor("op_948_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_948_end_mask_0 = const()[name = tensor("op_948_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_948_cast_fp16 = slice_by_index(begin = var_948_begin_0, end = var_948_end_0, end_mask = var_948_end_mask_0, x = q_cast_fp16)[name = tensor("op_948_cast_fp16")]; + tensor var_952_begin_0 = const()[name = tensor("op_952_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_952_end_0 = const()[name = tensor("op_952_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_952_end_mask_0 = const()[name = tensor("op_952_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_952_cast_fp16 = slice_by_index(begin = var_952_begin_0, end = var_952_end_0, end_mask = var_952_end_mask_0, x = q_cast_fp16)[name = tensor("op_952_cast_fp16")]; + tensor var_956_begin_0 = const()[name = tensor("op_956_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_956_end_0 = const()[name = tensor("op_956_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_956_end_mask_0 = const()[name = tensor("op_956_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_956_cast_fp16 = slice_by_index(begin = var_956_begin_0, end = var_956_end_0, end_mask = var_956_end_mask_0, x = q_cast_fp16)[name = tensor("op_956_cast_fp16")]; + tensor var_960_begin_0 = const()[name = tensor("op_960_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_960_end_0 = const()[name = tensor("op_960_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_960_end_mask_0 = const()[name = tensor("op_960_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_960_cast_fp16 = slice_by_index(begin = var_960_begin_0, end = var_960_end_0, end_mask = var_960_end_mask_0, x = q_cast_fp16)[name = tensor("op_960_cast_fp16")]; + tensor var_964_begin_0 = const()[name = tensor("op_964_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_964_end_0 = const()[name = tensor("op_964_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_964_end_mask_0 = const()[name = tensor("op_964_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_964_cast_fp16 = slice_by_index(begin = var_964_begin_0, end = var_964_end_0, end_mask = var_964_end_mask_0, x = q_cast_fp16)[name = tensor("op_964_cast_fp16")]; + tensor var_968_begin_0 = const()[name = tensor("op_968_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_968_end_0 = const()[name = tensor("op_968_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_968_end_mask_0 = const()[name = tensor("op_968_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = q_cast_fp16)[name = tensor("op_968_cast_fp16")]; + tensor var_972_begin_0 = const()[name = tensor("op_972_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_972_end_0 = const()[name = tensor("op_972_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_972_end_mask_0 = const()[name = tensor("op_972_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = q_cast_fp16)[name = tensor("op_972_cast_fp16")]; + tensor var_976_begin_0 = const()[name = tensor("op_976_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_976_end_0 = const()[name = tensor("op_976_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_976_end_mask_0 = const()[name = tensor("op_976_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = var_976_end_0, end_mask = var_976_end_mask_0, x = q_cast_fp16)[name = tensor("op_976_cast_fp16")]; + tensor var_980_begin_0 = const()[name = tensor("op_980_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_980_end_0 = const()[name = tensor("op_980_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_980_end_mask_0 = const()[name = tensor("op_980_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_980_cast_fp16 = slice_by_index(begin = var_980_begin_0, end = var_980_end_0, end_mask = var_980_end_mask_0, x = q_cast_fp16)[name = tensor("op_980_cast_fp16")]; + tensor var_984_begin_0 = const()[name = tensor("op_984_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_984_end_0 = const()[name = tensor("op_984_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_984_end_mask_0 = const()[name = tensor("op_984_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_984_cast_fp16 = slice_by_index(begin = var_984_begin_0, end = var_984_end_0, end_mask = var_984_end_mask_0, x = q_cast_fp16)[name = tensor("op_984_cast_fp16")]; + tensor var_988_begin_0 = const()[name = tensor("op_988_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_988_end_0 = const()[name = tensor("op_988_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_988_end_mask_0 = const()[name = tensor("op_988_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_988_cast_fp16 = slice_by_index(begin = var_988_begin_0, end = var_988_end_0, end_mask = var_988_end_mask_0, x = q_cast_fp16)[name = tensor("op_988_cast_fp16")]; + tensor var_992_begin_0 = const()[name = tensor("op_992_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_992_end_0 = const()[name = tensor("op_992_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_992_end_mask_0 = const()[name = tensor("op_992_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_992_cast_fp16 = slice_by_index(begin = var_992_begin_0, end = var_992_end_0, end_mask = var_992_end_mask_0, x = q_cast_fp16)[name = tensor("op_992_cast_fp16")]; + tensor var_996_begin_0 = const()[name = tensor("op_996_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_996_end_0 = const()[name = tensor("op_996_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_996_end_mask_0 = const()[name = tensor("op_996_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_996_cast_fp16 = slice_by_index(begin = var_996_begin_0, end = var_996_end_0, end_mask = var_996_end_mask_0, x = q_cast_fp16)[name = tensor("op_996_cast_fp16")]; + tensor var_1000_begin_0 = const()[name = tensor("op_1000_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1000_end_0 = const()[name = tensor("op_1000_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1000_end_mask_0 = const()[name = tensor("op_1000_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = var_1000_end_0, end_mask = var_1000_end_mask_0, x = q_cast_fp16)[name = tensor("op_1000_cast_fp16")]; + tensor var_1004_begin_0 = const()[name = tensor("op_1004_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1004_end_0 = const()[name = tensor("op_1004_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1004_end_mask_0 = const()[name = tensor("op_1004_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1004_cast_fp16 = slice_by_index(begin = var_1004_begin_0, end = var_1004_end_0, end_mask = var_1004_end_mask_0, x = q_cast_fp16)[name = tensor("op_1004_cast_fp16")]; + tensor var_1008_begin_0 = const()[name = tensor("op_1008_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1008_end_0 = const()[name = tensor("op_1008_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1008_end_mask_0 = const()[name = tensor("op_1008_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = var_1008_end_0, end_mask = var_1008_end_mask_0, x = q_cast_fp16)[name = tensor("op_1008_cast_fp16")]; + tensor var_1012_begin_0 = const()[name = tensor("op_1012_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1012_end_0 = const()[name = tensor("op_1012_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1012_end_mask_0 = const()[name = tensor("op_1012_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1012_cast_fp16 = slice_by_index(begin = var_1012_begin_0, end = var_1012_end_0, end_mask = var_1012_end_mask_0, x = q_cast_fp16)[name = tensor("op_1012_cast_fp16")]; + tensor var_1018_begin_0 = const()[name = tensor("op_1018_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1018_end_0 = const()[name = tensor("op_1018_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1018_end_mask_0 = const()[name = tensor("op_1018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1018_cast_fp16 = slice_by_index(begin = var_1018_begin_0, end = var_1018_end_0, end_mask = var_1018_end_mask_0, x = k_cast_fp16)[name = tensor("op_1018_cast_fp16")]; + tensor var_1030_begin_0 = const()[name = tensor("op_1030_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1030_end_0 = const()[name = tensor("op_1030_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1030_end_mask_0 = const()[name = tensor("op_1030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, x = k_cast_fp16)[name = tensor("op_1030_cast_fp16")]; + tensor var_1042_begin_0 = const()[name = tensor("op_1042_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1042_end_0 = const()[name = tensor("op_1042_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1042_end_mask_0 = const()[name = tensor("op_1042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = k_cast_fp16)[name = tensor("op_1042_cast_fp16")]; + tensor var_1054_begin_0 = const()[name = tensor("op_1054_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1054_end_0 = const()[name = tensor("op_1054_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1054_end_mask_0 = const()[name = tensor("op_1054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1054_cast_fp16 = slice_by_index(begin = var_1054_begin_0, end = var_1054_end_0, end_mask = var_1054_end_mask_0, x = k_cast_fp16)[name = tensor("op_1054_cast_fp16")]; + tensor var_1066_begin_0 = const()[name = tensor("op_1066_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1066_end_0 = const()[name = tensor("op_1066_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1066_end_mask_0 = const()[name = tensor("op_1066_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1066_cast_fp16 = slice_by_index(begin = var_1066_begin_0, end = var_1066_end_0, end_mask = var_1066_end_mask_0, x = k_cast_fp16)[name = tensor("op_1066_cast_fp16")]; + tensor var_1078_begin_0 = const()[name = tensor("op_1078_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1078_end_0 = const()[name = tensor("op_1078_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1078_end_mask_0 = const()[name = tensor("op_1078_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1078_cast_fp16 = slice_by_index(begin = var_1078_begin_0, end = var_1078_end_0, end_mask = var_1078_end_mask_0, x = k_cast_fp16)[name = tensor("op_1078_cast_fp16")]; + tensor var_1090_begin_0 = const()[name = tensor("op_1090_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1090_end_0 = const()[name = tensor("op_1090_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1090_end_mask_0 = const()[name = tensor("op_1090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = k_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor var_1102_begin_0 = const()[name = tensor("op_1102_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1102_end_0 = const()[name = tensor("op_1102_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1102_end_mask_0 = const()[name = tensor("op_1102_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1102_cast_fp16 = slice_by_index(begin = var_1102_begin_0, end = var_1102_end_0, end_mask = var_1102_end_mask_0, x = k_cast_fp16)[name = tensor("op_1102_cast_fp16")]; + tensor var_1112_begin_0 = const()[name = tensor("op_1112_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1112_end_0 = const()[name = tensor("op_1112_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1112_end_mask_0 = const()[name = tensor("op_1112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1112_cast_fp16")]; + tensor var_1124_begin_0 = const()[name = tensor("op_1124_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1124_end_0 = const()[name = tensor("op_1124_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1124_end_mask_0 = const()[name = tensor("op_1124_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1124_cast_fp16")]; + tensor var_1136_begin_0 = const()[name = tensor("op_1136_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1136_end_0 = const()[name = tensor("op_1136_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1136_end_mask_0 = const()[name = tensor("op_1136_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1136_cast_fp16")]; + tensor var_1148_begin_0 = const()[name = tensor("op_1148_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1148_end_0 = const()[name = tensor("op_1148_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1148_end_mask_0 = const()[name = tensor("op_1148_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1148_cast_fp16")]; + tensor var_1160_begin_0 = const()[name = tensor("op_1160_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1160_end_0 = const()[name = tensor("op_1160_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1160_end_mask_0 = const()[name = tensor("op_1160_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1160_cast_fp16")]; + tensor var_1172_begin_0 = const()[name = tensor("op_1172_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1172_end_0 = const()[name = tensor("op_1172_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1172_end_mask_0 = const()[name = tensor("op_1172_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1172_cast_fp16 = slice_by_index(begin = var_1172_begin_0, end = var_1172_end_0, end_mask = var_1172_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1172_cast_fp16")]; + tensor var_1184_begin_0 = const()[name = tensor("op_1184_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1184_end_0 = const()[name = tensor("op_1184_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1184_end_mask_0 = const()[name = tensor("op_1184_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1184_cast_fp16 = slice_by_index(begin = var_1184_begin_0, end = var_1184_end_0, end_mask = var_1184_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1184_cast_fp16")]; + tensor var_1196_begin_0 = const()[name = tensor("op_1196_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1196_end_0 = const()[name = tensor("op_1196_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1196_end_mask_0 = const()[name = tensor("op_1196_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1196_cast_fp16")]; + tensor var_1208_equation_0 = const()[name = tensor("op_1208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1208_cast_fp16 = einsum(equation = var_1208_equation_0, values = (var_1018_cast_fp16, var_920_cast_fp16))[name = tensor("op_1208_cast_fp16")]; + tensor var_1209_to_fp16 = const()[name = tensor("op_1209_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1210_cast_fp16 = mul(x = var_1208_cast_fp16, y = var_1209_to_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1212_equation_0 = const()[name = tensor("op_1212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1212_cast_fp16 = einsum(equation = var_1212_equation_0, values = (var_1018_cast_fp16, var_924_cast_fp16))[name = tensor("op_1212_cast_fp16")]; + tensor var_1213_to_fp16 = const()[name = tensor("op_1213_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1214_cast_fp16 = mul(x = var_1212_cast_fp16, y = var_1213_to_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1216_equation_0 = const()[name = tensor("op_1216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1216_cast_fp16 = einsum(equation = var_1216_equation_0, values = (var_1018_cast_fp16, var_928_cast_fp16))[name = tensor("op_1216_cast_fp16")]; + tensor var_1217_to_fp16 = const()[name = tensor("op_1217_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1218_cast_fp16 = mul(x = var_1216_cast_fp16, y = var_1217_to_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1220_equation_0 = const()[name = tensor("op_1220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1220_cast_fp16 = einsum(equation = var_1220_equation_0, values = (var_1030_cast_fp16, var_932_cast_fp16))[name = tensor("op_1220_cast_fp16")]; + tensor var_1221_to_fp16 = const()[name = tensor("op_1221_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1222_cast_fp16 = mul(x = var_1220_cast_fp16, y = var_1221_to_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1224_equation_0 = const()[name = tensor("op_1224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1224_cast_fp16 = einsum(equation = var_1224_equation_0, values = (var_1030_cast_fp16, var_936_cast_fp16))[name = tensor("op_1224_cast_fp16")]; + tensor var_1225_to_fp16 = const()[name = tensor("op_1225_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1226_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1030_cast_fp16, var_940_cast_fp16))[name = tensor("op_1228_cast_fp16")]; + tensor var_1229_to_fp16 = const()[name = tensor("op_1229_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1230_cast_fp16 = mul(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1042_cast_fp16, var_944_cast_fp16))[name = tensor("op_1232_cast_fp16")]; + tensor var_1233_to_fp16 = const()[name = tensor("op_1233_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1234_cast_fp16 = mul(x = var_1232_cast_fp16, y = var_1233_to_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1042_cast_fp16, var_948_cast_fp16))[name = tensor("op_1236_cast_fp16")]; + tensor var_1237_to_fp16 = const()[name = tensor("op_1237_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1238_cast_fp16 = mul(x = var_1236_cast_fp16, y = var_1237_to_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1042_cast_fp16, var_952_cast_fp16))[name = tensor("op_1240_cast_fp16")]; + tensor var_1241_to_fp16 = const()[name = tensor("op_1241_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1242_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1054_cast_fp16, var_956_cast_fp16))[name = tensor("op_1244_cast_fp16")]; + tensor var_1245_to_fp16 = const()[name = tensor("op_1245_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1246_cast_fp16 = mul(x = var_1244_cast_fp16, y = var_1245_to_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1054_cast_fp16, var_960_cast_fp16))[name = tensor("op_1248_cast_fp16")]; + tensor var_1249_to_fp16 = const()[name = tensor("op_1249_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1250_cast_fp16 = mul(x = var_1248_cast_fp16, y = var_1249_to_fp16)[name = tensor("op_1250_cast_fp16")]; + tensor var_1252_equation_0 = const()[name = tensor("op_1252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1054_cast_fp16, var_964_cast_fp16))[name = tensor("op_1252_cast_fp16")]; + tensor var_1253_to_fp16 = const()[name = tensor("op_1253_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1254_cast_fp16 = mul(x = var_1252_cast_fp16, y = var_1253_to_fp16)[name = tensor("op_1254_cast_fp16")]; + tensor var_1256_equation_0 = const()[name = tensor("op_1256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1066_cast_fp16, var_968_cast_fp16))[name = tensor("op_1256_cast_fp16")]; + tensor var_1257_to_fp16 = const()[name = tensor("op_1257_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1258_cast_fp16 = mul(x = var_1256_cast_fp16, y = var_1257_to_fp16)[name = tensor("op_1258_cast_fp16")]; + tensor var_1260_equation_0 = const()[name = tensor("op_1260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1066_cast_fp16, var_972_cast_fp16))[name = tensor("op_1260_cast_fp16")]; + tensor var_1261_to_fp16 = const()[name = tensor("op_1261_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1262_cast_fp16 = mul(x = var_1260_cast_fp16, y = var_1261_to_fp16)[name = tensor("op_1262_cast_fp16")]; + tensor var_1264_equation_0 = const()[name = tensor("op_1264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1066_cast_fp16, var_976_cast_fp16))[name = tensor("op_1264_cast_fp16")]; + tensor var_1265_to_fp16 = const()[name = tensor("op_1265_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1266_cast_fp16 = mul(x = var_1264_cast_fp16, y = var_1265_to_fp16)[name = tensor("op_1266_cast_fp16")]; + tensor var_1268_equation_0 = const()[name = tensor("op_1268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1078_cast_fp16, var_980_cast_fp16))[name = tensor("op_1268_cast_fp16")]; + tensor var_1269_to_fp16 = const()[name = tensor("op_1269_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1270_cast_fp16 = mul(x = var_1268_cast_fp16, y = var_1269_to_fp16)[name = tensor("op_1270_cast_fp16")]; + tensor var_1272_equation_0 = const()[name = tensor("op_1272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1078_cast_fp16, var_984_cast_fp16))[name = tensor("op_1272_cast_fp16")]; + tensor var_1273_to_fp16 = const()[name = tensor("op_1273_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1274_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = tensor("op_1274_cast_fp16")]; + tensor var_1276_equation_0 = const()[name = tensor("op_1276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1078_cast_fp16, var_988_cast_fp16))[name = tensor("op_1276_cast_fp16")]; + tensor var_1277_to_fp16 = const()[name = tensor("op_1277_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1278_cast_fp16 = mul(x = var_1276_cast_fp16, y = var_1277_to_fp16)[name = tensor("op_1278_cast_fp16")]; + tensor var_1280_equation_0 = const()[name = tensor("op_1280_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1090_cast_fp16, var_992_cast_fp16))[name = tensor("op_1280_cast_fp16")]; + tensor var_1281_to_fp16 = const()[name = tensor("op_1281_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1282_cast_fp16 = mul(x = var_1280_cast_fp16, y = var_1281_to_fp16)[name = tensor("op_1282_cast_fp16")]; + tensor var_1284_equation_0 = const()[name = tensor("op_1284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1284_cast_fp16 = einsum(equation = var_1284_equation_0, values = (var_1090_cast_fp16, var_996_cast_fp16))[name = tensor("op_1284_cast_fp16")]; + tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1286_cast_fp16 = mul(x = var_1284_cast_fp16, y = var_1285_to_fp16)[name = tensor("op_1286_cast_fp16")]; + tensor var_1288_equation_0 = const()[name = tensor("op_1288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1288_cast_fp16 = einsum(equation = var_1288_equation_0, values = (var_1090_cast_fp16, var_1000_cast_fp16))[name = tensor("op_1288_cast_fp16")]; + tensor var_1289_to_fp16 = const()[name = tensor("op_1289_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1290_cast_fp16 = mul(x = var_1288_cast_fp16, y = var_1289_to_fp16)[name = tensor("op_1290_cast_fp16")]; + tensor var_1292_equation_0 = const()[name = tensor("op_1292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1292_cast_fp16 = einsum(equation = var_1292_equation_0, values = (var_1102_cast_fp16, var_1004_cast_fp16))[name = tensor("op_1292_cast_fp16")]; + tensor var_1293_to_fp16 = const()[name = tensor("op_1293_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1294_cast_fp16 = mul(x = var_1292_cast_fp16, y = var_1293_to_fp16)[name = tensor("op_1294_cast_fp16")]; + tensor var_1296_equation_0 = const()[name = tensor("op_1296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1296_cast_fp16 = einsum(equation = var_1296_equation_0, values = (var_1102_cast_fp16, var_1008_cast_fp16))[name = tensor("op_1296_cast_fp16")]; + tensor var_1297_to_fp16 = const()[name = tensor("op_1297_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1298_cast_fp16 = mul(x = var_1296_cast_fp16, y = var_1297_to_fp16)[name = tensor("op_1298_cast_fp16")]; + tensor var_1300_equation_0 = const()[name = tensor("op_1300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1300_cast_fp16 = einsum(equation = var_1300_equation_0, values = (var_1102_cast_fp16, var_1012_cast_fp16))[name = tensor("op_1300_cast_fp16")]; + tensor var_1301_to_fp16 = const()[name = tensor("op_1301_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1302_cast_fp16 = mul(x = var_1300_cast_fp16, y = var_1301_to_fp16)[name = tensor("op_1302_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_1210_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_1214_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_1218_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_1222_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_1226_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_1230_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_1234_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_1238_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1242_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1246_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1250_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1254_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1258_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1262_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1266_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1270_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1274_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1278_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1282_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1286_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1290_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1294_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1298_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_1302_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_1327_cast_fp16 = softmax(axis = var_779, x = aw_49_cast_fp16)[name = tensor("op_1327_cast_fp16")]; + tensor var_1328_cast_fp16 = softmax(axis = var_779, x = aw_51_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1329_cast_fp16 = softmax(axis = var_779, x = aw_53_cast_fp16)[name = tensor("op_1329_cast_fp16")]; + tensor var_1330_cast_fp16 = softmax(axis = var_779, x = aw_55_cast_fp16)[name = tensor("op_1330_cast_fp16")]; + tensor var_1331_cast_fp16 = softmax(axis = var_779, x = aw_57_cast_fp16)[name = tensor("op_1331_cast_fp16")]; + tensor var_1332_cast_fp16 = softmax(axis = var_779, x = aw_59_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1333_cast_fp16 = softmax(axis = var_779, x = aw_61_cast_fp16)[name = tensor("op_1333_cast_fp16")]; + tensor var_1334_cast_fp16 = softmax(axis = var_779, x = aw_63_cast_fp16)[name = tensor("op_1334_cast_fp16")]; + tensor var_1335_cast_fp16 = softmax(axis = var_779, x = aw_65_cast_fp16)[name = tensor("op_1335_cast_fp16")]; + tensor var_1336_cast_fp16 = softmax(axis = var_779, x = aw_67_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1337_cast_fp16 = softmax(axis = var_779, x = aw_69_cast_fp16)[name = tensor("op_1337_cast_fp16")]; + tensor var_1338_cast_fp16 = softmax(axis = var_779, x = aw_71_cast_fp16)[name = tensor("op_1338_cast_fp16")]; + tensor var_1339_cast_fp16 = softmax(axis = var_779, x = aw_73_cast_fp16)[name = tensor("op_1339_cast_fp16")]; + tensor var_1340_cast_fp16 = softmax(axis = var_779, x = aw_75_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1341_cast_fp16 = softmax(axis = var_779, x = aw_77_cast_fp16)[name = tensor("op_1341_cast_fp16")]; + tensor var_1342_cast_fp16 = softmax(axis = var_779, x = aw_79_cast_fp16)[name = tensor("op_1342_cast_fp16")]; + tensor var_1343_cast_fp16 = softmax(axis = var_779, x = aw_81_cast_fp16)[name = tensor("op_1343_cast_fp16")]; + tensor var_1344_cast_fp16 = softmax(axis = var_779, x = aw_83_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1345_cast_fp16 = softmax(axis = var_779, x = aw_85_cast_fp16)[name = tensor("op_1345_cast_fp16")]; + tensor var_1346_cast_fp16 = softmax(axis = var_779, x = aw_87_cast_fp16)[name = tensor("op_1346_cast_fp16")]; + tensor var_1347_cast_fp16 = softmax(axis = var_779, x = aw_89_cast_fp16)[name = tensor("op_1347_cast_fp16")]; + tensor var_1348_cast_fp16 = softmax(axis = var_779, x = aw_91_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1349_cast_fp16 = softmax(axis = var_779, x = aw_93_cast_fp16)[name = tensor("op_1349_cast_fp16")]; + tensor var_1350_cast_fp16 = softmax(axis = var_779, x = aw_cast_fp16)[name = tensor("op_1350_cast_fp16")]; + tensor var_1352_equation_0 = const()[name = tensor("op_1352_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1352_cast_fp16 = einsum(equation = var_1352_equation_0, values = (var_1112_cast_fp16, var_1327_cast_fp16))[name = tensor("op_1352_cast_fp16")]; + tensor var_1354_equation_0 = const()[name = tensor("op_1354_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1354_cast_fp16 = einsum(equation = var_1354_equation_0, values = (var_1112_cast_fp16, var_1328_cast_fp16))[name = tensor("op_1354_cast_fp16")]; + tensor var_1356_equation_0 = const()[name = tensor("op_1356_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1356_cast_fp16 = einsum(equation = var_1356_equation_0, values = (var_1112_cast_fp16, var_1329_cast_fp16))[name = tensor("op_1356_cast_fp16")]; + tensor var_1358_equation_0 = const()[name = tensor("op_1358_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1358_cast_fp16 = einsum(equation = var_1358_equation_0, values = (var_1124_cast_fp16, var_1330_cast_fp16))[name = tensor("op_1358_cast_fp16")]; + tensor var_1360_equation_0 = const()[name = tensor("op_1360_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1360_cast_fp16 = einsum(equation = var_1360_equation_0, values = (var_1124_cast_fp16, var_1331_cast_fp16))[name = tensor("op_1360_cast_fp16")]; + tensor var_1362_equation_0 = const()[name = tensor("op_1362_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1362_cast_fp16 = einsum(equation = var_1362_equation_0, values = (var_1124_cast_fp16, var_1332_cast_fp16))[name = tensor("op_1362_cast_fp16")]; + tensor var_1364_equation_0 = const()[name = tensor("op_1364_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1364_cast_fp16 = einsum(equation = var_1364_equation_0, values = (var_1136_cast_fp16, var_1333_cast_fp16))[name = tensor("op_1364_cast_fp16")]; + tensor var_1366_equation_0 = const()[name = tensor("op_1366_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1366_cast_fp16 = einsum(equation = var_1366_equation_0, values = (var_1136_cast_fp16, var_1334_cast_fp16))[name = tensor("op_1366_cast_fp16")]; + tensor var_1368_equation_0 = const()[name = tensor("op_1368_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1368_cast_fp16 = einsum(equation = var_1368_equation_0, values = (var_1136_cast_fp16, var_1335_cast_fp16))[name = tensor("op_1368_cast_fp16")]; + tensor var_1370_equation_0 = const()[name = tensor("op_1370_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1370_cast_fp16 = einsum(equation = var_1370_equation_0, values = (var_1148_cast_fp16, var_1336_cast_fp16))[name = tensor("op_1370_cast_fp16")]; + tensor var_1372_equation_0 = const()[name = tensor("op_1372_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1372_cast_fp16 = einsum(equation = var_1372_equation_0, values = (var_1148_cast_fp16, var_1337_cast_fp16))[name = tensor("op_1372_cast_fp16")]; + tensor var_1374_equation_0 = const()[name = tensor("op_1374_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1374_cast_fp16 = einsum(equation = var_1374_equation_0, values = (var_1148_cast_fp16, var_1338_cast_fp16))[name = tensor("op_1374_cast_fp16")]; + tensor var_1376_equation_0 = const()[name = tensor("op_1376_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1376_cast_fp16 = einsum(equation = var_1376_equation_0, values = (var_1160_cast_fp16, var_1339_cast_fp16))[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_equation_0 = const()[name = tensor("op_1378_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1378_cast_fp16 = einsum(equation = var_1378_equation_0, values = (var_1160_cast_fp16, var_1340_cast_fp16))[name = tensor("op_1378_cast_fp16")]; + tensor var_1380_equation_0 = const()[name = tensor("op_1380_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1380_cast_fp16 = einsum(equation = var_1380_equation_0, values = (var_1160_cast_fp16, var_1341_cast_fp16))[name = tensor("op_1380_cast_fp16")]; + tensor var_1382_equation_0 = const()[name = tensor("op_1382_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1382_cast_fp16 = einsum(equation = var_1382_equation_0, values = (var_1172_cast_fp16, var_1342_cast_fp16))[name = tensor("op_1382_cast_fp16")]; + tensor var_1384_equation_0 = const()[name = tensor("op_1384_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1384_cast_fp16 = einsum(equation = var_1384_equation_0, values = (var_1172_cast_fp16, var_1343_cast_fp16))[name = tensor("op_1384_cast_fp16")]; + tensor var_1386_equation_0 = const()[name = tensor("op_1386_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1386_cast_fp16 = einsum(equation = var_1386_equation_0, values = (var_1172_cast_fp16, var_1344_cast_fp16))[name = tensor("op_1386_cast_fp16")]; + tensor var_1388_equation_0 = const()[name = tensor("op_1388_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1388_cast_fp16 = einsum(equation = var_1388_equation_0, values = (var_1184_cast_fp16, var_1345_cast_fp16))[name = tensor("op_1388_cast_fp16")]; + tensor var_1390_equation_0 = const()[name = tensor("op_1390_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1390_cast_fp16 = einsum(equation = var_1390_equation_0, values = (var_1184_cast_fp16, var_1346_cast_fp16))[name = tensor("op_1390_cast_fp16")]; + tensor var_1392_equation_0 = const()[name = tensor("op_1392_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1392_cast_fp16 = einsum(equation = var_1392_equation_0, values = (var_1184_cast_fp16, var_1347_cast_fp16))[name = tensor("op_1392_cast_fp16")]; + tensor var_1394_equation_0 = const()[name = tensor("op_1394_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1394_cast_fp16 = einsum(equation = var_1394_equation_0, values = (var_1196_cast_fp16, var_1348_cast_fp16))[name = tensor("op_1394_cast_fp16")]; + tensor var_1396_equation_0 = const()[name = tensor("op_1396_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1396_cast_fp16 = einsum(equation = var_1396_equation_0, values = (var_1196_cast_fp16, var_1349_cast_fp16))[name = tensor("op_1396_cast_fp16")]; + tensor var_1398_equation_0 = const()[name = tensor("op_1398_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1398_cast_fp16 = einsum(equation = var_1398_equation_0, values = (var_1196_cast_fp16, var_1350_cast_fp16))[name = tensor("op_1398_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_779, interleave = x_27_interleave_0, values = (var_1352_cast_fp16, var_1354_cast_fp16, var_1356_cast_fp16, var_1358_cast_fp16, var_1360_cast_fp16, var_1362_cast_fp16, var_1364_cast_fp16, var_1366_cast_fp16, var_1368_cast_fp16, var_1370_cast_fp16, var_1372_cast_fp16, var_1374_cast_fp16, var_1376_cast_fp16, var_1378_cast_fp16, var_1380_cast_fp16, var_1382_cast_fp16, var_1384_cast_fp16, var_1386_cast_fp16, var_1388_cast_fp16, var_1390_cast_fp16, var_1392_cast_fp16, var_1394_cast_fp16, var_1396_cast_fp16, var_1398_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1403 = const()[name = tensor("op_1403"), val = tensor([1, 3072, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1403, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1406 = const()[name = tensor("op_1406"), val = tensor([1, 1])]; + tensor var_1408 = const()[name = tensor("op_1408"), val = tensor([1, 1])]; + tensor attention_output_pad_type_0 = const()[name = tensor("attention_output_pad_type_0"), val = tensor("custom")]; + tensor attention_output_pad_0 = const()[name = tensor("attention_output_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232803776)))]; + tensor attention_output_cast_fp16 = conv(dilations = var_1408, groups = var_779, pad = attention_output_pad_0, pad_type = attention_output_pad_type_0, strides = var_1406, weight = blocks_1_attn_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678208)))]; + tensor x_eps_cast_fp16 = concat(axis = var_779, interleave = x_eps_interleave_0, values = (x_29_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_782, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1434_to_fp16 = const()[name = tensor("op_1434_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1434_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678400)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1445 = const()[name = tensor("op_1445"), val = tensor([1, 1])]; + tensor var_1447 = const()[name = tensor("op_1447"), val = tensor([1, 1])]; + tensor input_17_pad_type_0 = const()[name = tensor("input_17_pad_type_0"), val = tensor("custom")]; + tensor input_17_pad_0 = const()[name = tensor("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251684608)))]; + tensor input_17_cast_fp16 = conv(dilations = var_1447, groups = var_779, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_1445, weight = blocks_1_mlp_fc_1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1451 = const()[name = tensor("op_1451"), val = tensor([1, 1])]; + tensor var_1453 = const()[name = tensor("op_1453"), val = tensor([1, 1])]; + tensor x_fc_2_pad_type_0 = const()[name = tensor("x_fc_2_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_pad_0 = const()[name = tensor("x_fc_2_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302016320)))]; + tensor x_fc_2_cast_fp16 = conv(dilations = var_1453, groups = var_779, pad = x_fc_2_pad_0, pad_type = x_fc_2_pad_type_0, strides = var_1451, weight = blocks_1_mlp_fc_2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_1456_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1456_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_1456_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_1459 = const()[name = tensor("op_1459"), val = tensor([1, 1])]; + tensor var_1461 = const()[name = tensor("op_1461"), val = tensor([1, 1])]; + tensor var_1463_pad_type_0 = const()[name = tensor("op_1463_pad_type_0"), val = tensor("custom")]; + tensor var_1463_pad_0 = const()[name = tensor("op_1463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(352348032)))]; + tensor var_1463_cast_fp16 = conv(dilations = var_1461, groups = var_779, pad = var_1463_pad_0, pad_type = var_1463_pad_type_0, strides = var_1459, weight = blocks_1_mlp_proj_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_1463_cast_fp16")]; + tensor new_x = add(x = var_1463_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_1464_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_v_cache_0, new_k_cache_1, new_v_cache_1); +} \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk5.mlmodelc/weights/weight.bin b/Llama-3.2-3B-Instruct_chunk5.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..5c591a4d84403719c2261df43f73889f84514e4b --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk5.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:65a458c63e278bc37e0b52a60e57c256764913cdd7e72c783179a10c9df8a554 +size 402679744 diff --git a/Llama-3.2-3B-Instruct_chunk6.mlmodelc/analytics/coremldata.bin b/Llama-3.2-3B-Instruct_chunk6.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a63af39cde8e590e41fffd270ab8aede737490d --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk6.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf21e446e7587de3fd840eae95f3e79729298df568725552f7ef5fd8f954e58c +size 243 diff --git a/Llama-3.2-3B-Instruct_chunk6.mlmodelc/coremldata.bin b/Llama-3.2-3B-Instruct_chunk6.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef844658693d8a7fc2951abf2761f8f5f9bc62c3 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk6.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8129d684aa1ea8b76708a186fe44f7ffc4aa08b4854907105fe41c0825e71875 +size 653 diff --git a/Llama-3.2-3B-Instruct_chunk6.mlmodelc/metadata.json b/Llama-3.2-3B-Instruct_chunk6.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..88ef5c7d29a3edb397ad09f9fbc49d61a6194f0d --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk6.mlmodelc/metadata.json @@ -0,0 +1,178 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 14, + "Ios16.mul" : 70, + "SliceByIndex" : 88, + "Transpose" : 2, + "Ios16.einsum" : 96, + "Ios16.conv" : 14, + "Ios16.add" : 56, + "Ios16.realDiv" : 4, + "Ios16.softmax" : 48, + "Ios16.reduceL2Norm" : 4, + "Ios16.reshape" : 14, + "Ios16.silu" : 2 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 512, 1, 64]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_3_2_3B_Instruct_2024_11_09_16_14_37_chunk6", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk6.mlmodelc/model.mil b/Llama-3.2-3B-Instruct_chunk6.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..78594b4291dc45ae43652f9a31200581b19ad3c6 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk6.mlmodelc/model.mil @@ -0,0 +1,956 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { + tensor var_13 = const()[name = tensor("op_13"), val = tensor(-1)]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-2)]; + tensor var_19 = const()[name = tensor("op_19"), val = tensor(-3)]; + tensor var_52 = const()[name = tensor("op_52"), val = tensor(1)]; + tensor var_55 = const()[name = tensor("op_55"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_52, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_55, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_79_to_fp16 = const()[name = tensor("op_79_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_79_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_100 = const()[name = tensor("op_100"), val = tensor([1, 3072, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_100, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_103 = const()[name = tensor("op_103"), val = tensor([1, 1])]; + tensor var_105 = const()[name = tensor("op_105"), val = tensor([1, 1])]; + tensor q_1_pad_type_0 = const()[name = tensor("q_1_pad_type_0"), val = tensor("custom")]; + tensor q_1_pad_0 = const()[name = tensor("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6464)))]; + tensor q_1_cast_fp16 = conv(dilations = var_105, groups = var_52, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = var_103, weight = blocks_0_attn_q_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_109 = const()[name = tensor("op_109"), val = tensor([1, 1])]; + tensor var_111 = const()[name = tensor("op_111"), val = tensor([1, 1])]; + tensor k_1_pad_type_0 = const()[name = tensor("k_1_pad_type_0"), val = tensor("custom")]; + tensor k_1_pad_0 = const()[name = tensor("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18880896)))]; + tensor k_1_cast_fp16 = conv(dilations = var_111, groups = var_52, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = var_109, weight = blocks_0_attn_k_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_115 = const()[name = tensor("op_115"), val = tensor([1, 1])]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor v_1_pad_type_0 = const()[name = tensor("v_1_pad_type_0"), val = tensor("custom")]; + tensor v_1_pad_0 = const()[name = tensor("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25172416)))]; + tensor v_1_cast_fp16 = conv(dilations = var_117, groups = var_52, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = var_115, weight = blocks_0_attn_v_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_120 = const()[name = tensor("op_120"), val = tensor([1, 24, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_120, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_122 = const()[name = tensor("op_122"), val = tensor([1, -1, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_122, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_136_begin_0 = const()[name = tensor("op_136_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_136_end_0 = const()[name = tensor("op_136_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_136_end_mask_0 = const()[name = tensor("op_136_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_136_cast_fp16 = slice_by_index(begin = var_136_begin_0, end = var_136_end_0, end_mask = var_136_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_136_cast_fp16")]; + tensor var_142_begin_0 = const()[name = tensor("op_142_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_142_end_0 = const()[name = tensor("op_142_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_142_end_mask_0 = const()[name = tensor("op_142_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_142_cast_fp16 = slice_by_index(begin = var_142_begin_0, end = var_142_end_0, end_mask = var_142_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_142_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_144_cast_fp16 = mul(x = var_142_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_144_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_17, interleave = rotated_1_interleave_0, values = (var_144_cast_fp16, var_136_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_147_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_147_cast_fp16")]; + tensor var_148_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_148_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_147_cast_fp16, y = var_148_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor var_167_begin_0 = const()[name = tensor("op_167_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_167_end_0 = const()[name = tensor("op_167_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_167_end_mask_0 = const()[name = tensor("op_167_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_167_cast_fp16 = slice_by_index(begin = var_167_begin_0, end = var_167_end_0, end_mask = var_167_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_167_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_169_cast_fp16 = mul(x = var_167_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_169_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_17, interleave = rotated_3_interleave_0, values = (var_169_cast_fp16, var_161_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_172_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_172_cast_fp16")]; + tensor var_173_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_173_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_172_cast_fp16, y = var_173_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_176 = const()[name = tensor("op_176"), val = tensor([1, -1, 1, 64])]; + tensor k_7_cast_fp16 = reshape(shape = var_176, x = roped_3_cast_fp16)[name = tensor("k_7_cast_fp16")]; + tensor var_178 = const()[name = tensor("op_178"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_178, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_11_interleave_0 = const()[name = tensor("k_11_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_9_perm_0, x = k_7_cast_fp16)[name = tensor("transpose_1")]; + tensor k_11_cast_fp16 = concat(axis = var_19, interleave = k_11_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_11_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_13, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1, 3072, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_186, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_191_begin_0 = const()[name = tensor("op_191_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_191_end_0 = const()[name = tensor("op_191_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_191_end_mask_0 = const()[name = tensor("op_191_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_191_cast_fp16 = slice_by_index(begin = var_191_begin_0, end = var_191_end_0, end_mask = var_191_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_191_cast_fp16")]; + tensor var_195_begin_0 = const()[name = tensor("op_195_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_195_end_0 = const()[name = tensor("op_195_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_195_end_mask_0 = const()[name = tensor("op_195_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_195_cast_fp16 = slice_by_index(begin = var_195_begin_0, end = var_195_end_0, end_mask = var_195_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_195_cast_fp16")]; + tensor var_199_begin_0 = const()[name = tensor("op_199_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_199_end_0 = const()[name = tensor("op_199_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_199_end_mask_0 = const()[name = tensor("op_199_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_199_cast_fp16 = slice_by_index(begin = var_199_begin_0, end = var_199_end_0, end_mask = var_199_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_199_cast_fp16")]; + tensor var_203_begin_0 = const()[name = tensor("op_203_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_203_end_0 = const()[name = tensor("op_203_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_203_end_mask_0 = const()[name = tensor("op_203_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_203_cast_fp16 = slice_by_index(begin = var_203_begin_0, end = var_203_end_0, end_mask = var_203_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_203_cast_fp16")]; + tensor var_207_begin_0 = const()[name = tensor("op_207_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_207_end_0 = const()[name = tensor("op_207_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_207_end_mask_0 = const()[name = tensor("op_207_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = var_207_end_0, end_mask = var_207_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_207_cast_fp16")]; + tensor var_211_begin_0 = const()[name = tensor("op_211_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_211_end_0 = const()[name = tensor("op_211_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_211_end_mask_0 = const()[name = tensor("op_211_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_211_cast_fp16 = slice_by_index(begin = var_211_begin_0, end = var_211_end_0, end_mask = var_211_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_211_cast_fp16")]; + tensor var_215_begin_0 = const()[name = tensor("op_215_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_215_end_0 = const()[name = tensor("op_215_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_215_end_mask_0 = const()[name = tensor("op_215_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_215_cast_fp16 = slice_by_index(begin = var_215_begin_0, end = var_215_end_0, end_mask = var_215_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_215_cast_fp16")]; + tensor var_219_begin_0 = const()[name = tensor("op_219_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_219_end_0 = const()[name = tensor("op_219_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_219_end_mask_0 = const()[name = tensor("op_219_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_219_cast_fp16 = slice_by_index(begin = var_219_begin_0, end = var_219_end_0, end_mask = var_219_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_219_cast_fp16")]; + tensor var_223_begin_0 = const()[name = tensor("op_223_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_223_end_0 = const()[name = tensor("op_223_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_223_end_mask_0 = const()[name = tensor("op_223_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_223_cast_fp16 = slice_by_index(begin = var_223_begin_0, end = var_223_end_0, end_mask = var_223_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_223_cast_fp16")]; + tensor var_227_begin_0 = const()[name = tensor("op_227_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_227_end_0 = const()[name = tensor("op_227_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_227_end_mask_0 = const()[name = tensor("op_227_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_227_cast_fp16")]; + tensor var_231_begin_0 = const()[name = tensor("op_231_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_231_end_0 = const()[name = tensor("op_231_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_231_end_mask_0 = const()[name = tensor("op_231_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_231_cast_fp16 = slice_by_index(begin = var_231_begin_0, end = var_231_end_0, end_mask = var_231_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_231_cast_fp16")]; + tensor var_235_begin_0 = const()[name = tensor("op_235_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_235_end_0 = const()[name = tensor("op_235_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_235_end_mask_0 = const()[name = tensor("op_235_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = var_235_end_0, end_mask = var_235_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_235_cast_fp16")]; + tensor var_239_begin_0 = const()[name = tensor("op_239_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_239_end_0 = const()[name = tensor("op_239_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_239_end_mask_0 = const()[name = tensor("op_239_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_239_cast_fp16")]; + tensor var_243_begin_0 = const()[name = tensor("op_243_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_243_end_0 = const()[name = tensor("op_243_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_243_end_mask_0 = const()[name = tensor("op_243_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_243_cast_fp16")]; + tensor var_247_begin_0 = const()[name = tensor("op_247_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_247_end_0 = const()[name = tensor("op_247_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_247_end_mask_0 = const()[name = tensor("op_247_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_247_cast_fp16")]; + tensor var_251_begin_0 = const()[name = tensor("op_251_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_251_end_0 = const()[name = tensor("op_251_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_251_end_mask_0 = const()[name = tensor("op_251_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_251_cast_fp16")]; + tensor var_255_begin_0 = const()[name = tensor("op_255_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_255_end_0 = const()[name = tensor("op_255_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_255_end_mask_0 = const()[name = tensor("op_255_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_255_cast_fp16")]; + tensor var_259_begin_0 = const()[name = tensor("op_259_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_259_end_0 = const()[name = tensor("op_259_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_259_end_mask_0 = const()[name = tensor("op_259_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_263_cast_fp16")]; + tensor var_267_begin_0 = const()[name = tensor("op_267_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_267_end_0 = const()[name = tensor("op_267_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_267_end_mask_0 = const()[name = tensor("op_267_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_267_cast_fp16")]; + tensor var_271_begin_0 = const()[name = tensor("op_271_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_271_end_0 = const()[name = tensor("op_271_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_271_end_mask_0 = const()[name = tensor("op_271_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_271_cast_fp16")]; + tensor var_275_begin_0 = const()[name = tensor("op_275_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_275_end_0 = const()[name = tensor("op_275_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_275_end_mask_0 = const()[name = tensor("op_275_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_275_cast_fp16")]; + tensor var_279_begin_0 = const()[name = tensor("op_279_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_279_end_0 = const()[name = tensor("op_279_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_279_end_mask_0 = const()[name = tensor("op_279_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_279_cast_fp16")]; + tensor var_283_begin_0 = const()[name = tensor("op_283_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_283_end_0 = const()[name = tensor("op_283_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_283_end_mask_0 = const()[name = tensor("op_283_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_283_cast_fp16")]; + tensor var_289_begin_0 = const()[name = tensor("op_289_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_289_end_0 = const()[name = tensor("op_289_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_289_end_mask_0 = const()[name = tensor("op_289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_289_cast_fp16")]; + tensor var_301_begin_0 = const()[name = tensor("op_301_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_301_end_0 = const()[name = tensor("op_301_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_301_end_mask_0 = const()[name = tensor("op_301_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_301_cast_fp16")]; + tensor var_313_begin_0 = const()[name = tensor("op_313_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_313_end_0 = const()[name = tensor("op_313_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_313_end_mask_0 = const()[name = tensor("op_313_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_313_cast_fp16 = slice_by_index(begin = var_313_begin_0, end = var_313_end_0, end_mask = var_313_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_313_cast_fp16")]; + tensor var_325_begin_0 = const()[name = tensor("op_325_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_325_end_0 = const()[name = tensor("op_325_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_325_end_mask_0 = const()[name = tensor("op_325_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_325_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_349_begin_0 = const()[name = tensor("op_349_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_349_end_0 = const()[name = tensor("op_349_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_349_end_mask_0 = const()[name = tensor("op_349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_349_cast_fp16 = slice_by_index(begin = var_349_begin_0, end = var_349_end_0, end_mask = var_349_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_349_cast_fp16")]; + tensor var_361_begin_0 = const()[name = tensor("op_361_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_361_end_0 = const()[name = tensor("op_361_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_361_end_mask_0 = const()[name = tensor("op_361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_361_cast_fp16 = slice_by_index(begin = var_361_begin_0, end = var_361_end_0, end_mask = var_361_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_361_cast_fp16")]; + tensor var_373_begin_0 = const()[name = tensor("op_373_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_373_end_0 = const()[name = tensor("op_373_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_373_end_mask_0 = const()[name = tensor("op_373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_373_cast_fp16")]; + tensor var_383_begin_0 = const()[name = tensor("op_383_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_383_end_0 = const()[name = tensor("op_383_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_383_end_mask_0 = const()[name = tensor("op_383_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_383_cast_fp16")]; + tensor var_395_begin_0 = const()[name = tensor("op_395_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_395_end_0 = const()[name = tensor("op_395_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_395_end_mask_0 = const()[name = tensor("op_395_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_395_cast_fp16")]; + tensor var_407_begin_0 = const()[name = tensor("op_407_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_407_end_0 = const()[name = tensor("op_407_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_407_end_mask_0 = const()[name = tensor("op_407_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_407_cast_fp16 = slice_by_index(begin = var_407_begin_0, end = var_407_end_0, end_mask = var_407_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_407_cast_fp16")]; + tensor var_419_begin_0 = const()[name = tensor("op_419_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_419_end_0 = const()[name = tensor("op_419_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_419_end_mask_0 = const()[name = tensor("op_419_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_419_cast_fp16 = slice_by_index(begin = var_419_begin_0, end = var_419_end_0, end_mask = var_419_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_419_cast_fp16")]; + tensor var_431_begin_0 = const()[name = tensor("op_431_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_431_end_0 = const()[name = tensor("op_431_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_431_end_mask_0 = const()[name = tensor("op_431_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_431_cast_fp16 = slice_by_index(begin = var_431_begin_0, end = var_431_end_0, end_mask = var_431_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_431_cast_fp16")]; + tensor var_443_begin_0 = const()[name = tensor("op_443_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_443_end_0 = const()[name = tensor("op_443_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_443_end_mask_0 = const()[name = tensor("op_443_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_443_cast_fp16 = slice_by_index(begin = var_443_begin_0, end = var_443_end_0, end_mask = var_443_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_443_cast_fp16")]; + tensor var_455_begin_0 = const()[name = tensor("op_455_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_455_end_0 = const()[name = tensor("op_455_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_455_end_mask_0 = const()[name = tensor("op_455_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_455_cast_fp16")]; + tensor var_467_begin_0 = const()[name = tensor("op_467_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_467_end_0 = const()[name = tensor("op_467_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_467_end_mask_0 = const()[name = tensor("op_467_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_467_cast_fp16 = slice_by_index(begin = var_467_begin_0, end = var_467_end_0, end_mask = var_467_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_467_cast_fp16")]; + tensor var_479_equation_0 = const()[name = tensor("op_479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_289_cast_fp16, var_191_cast_fp16))[name = tensor("op_479_cast_fp16")]; + tensor var_480_to_fp16 = const()[name = tensor("op_480_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_481_cast_fp16 = mul(x = var_479_cast_fp16, y = var_480_to_fp16)[name = tensor("op_481_cast_fp16")]; + tensor var_483_equation_0 = const()[name = tensor("op_483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_289_cast_fp16, var_195_cast_fp16))[name = tensor("op_483_cast_fp16")]; + tensor var_484_to_fp16 = const()[name = tensor("op_484_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_485_cast_fp16 = mul(x = var_483_cast_fp16, y = var_484_to_fp16)[name = tensor("op_485_cast_fp16")]; + tensor var_487_equation_0 = const()[name = tensor("op_487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_487_cast_fp16 = einsum(equation = var_487_equation_0, values = (var_289_cast_fp16, var_199_cast_fp16))[name = tensor("op_487_cast_fp16")]; + tensor var_488_to_fp16 = const()[name = tensor("op_488_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_489_cast_fp16 = mul(x = var_487_cast_fp16, y = var_488_to_fp16)[name = tensor("op_489_cast_fp16")]; + tensor var_491_equation_0 = const()[name = tensor("op_491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_491_cast_fp16 = einsum(equation = var_491_equation_0, values = (var_301_cast_fp16, var_203_cast_fp16))[name = tensor("op_491_cast_fp16")]; + tensor var_492_to_fp16 = const()[name = tensor("op_492_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_493_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = tensor("op_493_cast_fp16")]; + tensor var_495_equation_0 = const()[name = tensor("op_495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_495_cast_fp16 = einsum(equation = var_495_equation_0, values = (var_301_cast_fp16, var_207_cast_fp16))[name = tensor("op_495_cast_fp16")]; + tensor var_496_to_fp16 = const()[name = tensor("op_496_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_497_cast_fp16 = mul(x = var_495_cast_fp16, y = var_496_to_fp16)[name = tensor("op_497_cast_fp16")]; + tensor var_499_equation_0 = const()[name = tensor("op_499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_499_cast_fp16 = einsum(equation = var_499_equation_0, values = (var_301_cast_fp16, var_211_cast_fp16))[name = tensor("op_499_cast_fp16")]; + tensor var_500_to_fp16 = const()[name = tensor("op_500_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_501_cast_fp16 = mul(x = var_499_cast_fp16, y = var_500_to_fp16)[name = tensor("op_501_cast_fp16")]; + tensor var_503_equation_0 = const()[name = tensor("op_503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_503_cast_fp16 = einsum(equation = var_503_equation_0, values = (var_313_cast_fp16, var_215_cast_fp16))[name = tensor("op_503_cast_fp16")]; + tensor var_504_to_fp16 = const()[name = tensor("op_504_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_505_cast_fp16 = mul(x = var_503_cast_fp16, y = var_504_to_fp16)[name = tensor("op_505_cast_fp16")]; + tensor var_507_equation_0 = const()[name = tensor("op_507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_507_cast_fp16 = einsum(equation = var_507_equation_0, values = (var_313_cast_fp16, var_219_cast_fp16))[name = tensor("op_507_cast_fp16")]; + tensor var_508_to_fp16 = const()[name = tensor("op_508_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16, y = var_508_to_fp16)[name = tensor("op_509_cast_fp16")]; + tensor var_511_equation_0 = const()[name = tensor("op_511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_511_cast_fp16 = einsum(equation = var_511_equation_0, values = (var_313_cast_fp16, var_223_cast_fp16))[name = tensor("op_511_cast_fp16")]; + tensor var_512_to_fp16 = const()[name = tensor("op_512_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_513_cast_fp16 = mul(x = var_511_cast_fp16, y = var_512_to_fp16)[name = tensor("op_513_cast_fp16")]; + tensor var_515_equation_0 = const()[name = tensor("op_515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_515_cast_fp16 = einsum(equation = var_515_equation_0, values = (var_325_cast_fp16, var_227_cast_fp16))[name = tensor("op_515_cast_fp16")]; + tensor var_516_to_fp16 = const()[name = tensor("op_516_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_517_cast_fp16 = mul(x = var_515_cast_fp16, y = var_516_to_fp16)[name = tensor("op_517_cast_fp16")]; + tensor var_519_equation_0 = const()[name = tensor("op_519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_519_cast_fp16 = einsum(equation = var_519_equation_0, values = (var_325_cast_fp16, var_231_cast_fp16))[name = tensor("op_519_cast_fp16")]; + tensor var_520_to_fp16 = const()[name = tensor("op_520_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_521_cast_fp16 = mul(x = var_519_cast_fp16, y = var_520_to_fp16)[name = tensor("op_521_cast_fp16")]; + tensor var_523_equation_0 = const()[name = tensor("op_523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_325_cast_fp16, var_235_cast_fp16))[name = tensor("op_523_cast_fp16")]; + tensor var_524_to_fp16 = const()[name = tensor("op_524_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_525_cast_fp16 = mul(x = var_523_cast_fp16, y = var_524_to_fp16)[name = tensor("op_525_cast_fp16")]; + tensor var_527_equation_0 = const()[name = tensor("op_527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_337_cast_fp16, var_239_cast_fp16))[name = tensor("op_527_cast_fp16")]; + tensor var_528_to_fp16 = const()[name = tensor("op_528_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_529_cast_fp16 = mul(x = var_527_cast_fp16, y = var_528_to_fp16)[name = tensor("op_529_cast_fp16")]; + tensor var_531_equation_0 = const()[name = tensor("op_531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_337_cast_fp16, var_243_cast_fp16))[name = tensor("op_531_cast_fp16")]; + tensor var_532_to_fp16 = const()[name = tensor("op_532_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_533_cast_fp16 = mul(x = var_531_cast_fp16, y = var_532_to_fp16)[name = tensor("op_533_cast_fp16")]; + tensor var_535_equation_0 = const()[name = tensor("op_535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_337_cast_fp16, var_247_cast_fp16))[name = tensor("op_535_cast_fp16")]; + tensor var_536_to_fp16 = const()[name = tensor("op_536_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_537_cast_fp16 = mul(x = var_535_cast_fp16, y = var_536_to_fp16)[name = tensor("op_537_cast_fp16")]; + tensor var_539_equation_0 = const()[name = tensor("op_539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_539_cast_fp16 = einsum(equation = var_539_equation_0, values = (var_349_cast_fp16, var_251_cast_fp16))[name = tensor("op_539_cast_fp16")]; + tensor var_540_to_fp16 = const()[name = tensor("op_540_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_541_cast_fp16 = mul(x = var_539_cast_fp16, y = var_540_to_fp16)[name = tensor("op_541_cast_fp16")]; + tensor var_543_equation_0 = const()[name = tensor("op_543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_543_cast_fp16 = einsum(equation = var_543_equation_0, values = (var_349_cast_fp16, var_255_cast_fp16))[name = tensor("op_543_cast_fp16")]; + tensor var_544_to_fp16 = const()[name = tensor("op_544_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_545_cast_fp16 = mul(x = var_543_cast_fp16, y = var_544_to_fp16)[name = tensor("op_545_cast_fp16")]; + tensor var_547_equation_0 = const()[name = tensor("op_547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_349_cast_fp16, var_259_cast_fp16))[name = tensor("op_547_cast_fp16")]; + tensor var_548_to_fp16 = const()[name = tensor("op_548_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_549_cast_fp16 = mul(x = var_547_cast_fp16, y = var_548_to_fp16)[name = tensor("op_549_cast_fp16")]; + tensor var_551_equation_0 = const()[name = tensor("op_551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_361_cast_fp16, var_263_cast_fp16))[name = tensor("op_551_cast_fp16")]; + tensor var_552_to_fp16 = const()[name = tensor("op_552_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_553_cast_fp16 = mul(x = var_551_cast_fp16, y = var_552_to_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_555_equation_0 = const()[name = tensor("op_555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_361_cast_fp16, var_267_cast_fp16))[name = tensor("op_555_cast_fp16")]; + tensor var_556_to_fp16 = const()[name = tensor("op_556_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_557_cast_fp16 = mul(x = var_555_cast_fp16, y = var_556_to_fp16)[name = tensor("op_557_cast_fp16")]; + tensor var_559_equation_0 = const()[name = tensor("op_559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_361_cast_fp16, var_271_cast_fp16))[name = tensor("op_559_cast_fp16")]; + tensor var_560_to_fp16 = const()[name = tensor("op_560_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = var_560_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor var_563_equation_0 = const()[name = tensor("op_563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_373_cast_fp16, var_275_cast_fp16))[name = tensor("op_563_cast_fp16")]; + tensor var_564_to_fp16 = const()[name = tensor("op_564_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_565_cast_fp16 = mul(x = var_563_cast_fp16, y = var_564_to_fp16)[name = tensor("op_565_cast_fp16")]; + tensor var_567_equation_0 = const()[name = tensor("op_567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_373_cast_fp16, var_279_cast_fp16))[name = tensor("op_567_cast_fp16")]; + tensor var_568_to_fp16 = const()[name = tensor("op_568_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_569_cast_fp16 = mul(x = var_567_cast_fp16, y = var_568_to_fp16)[name = tensor("op_569_cast_fp16")]; + tensor var_571_equation_0 = const()[name = tensor("op_571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_373_cast_fp16, var_283_cast_fp16))[name = tensor("op_571_cast_fp16")]; + tensor var_572_to_fp16 = const()[name = tensor("op_572_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_573_cast_fp16 = mul(x = var_571_cast_fp16, y = var_572_to_fp16)[name = tensor("op_573_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_481_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_485_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_489_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_493_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_497_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_501_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_505_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_509_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_513_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_517_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_521_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_525_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_529_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_533_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_537_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_541_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_545_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_549_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_553_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_557_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_561_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_565_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_569_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_573_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor var_598_cast_fp16 = softmax(axis = var_52, x = aw_1_cast_fp16)[name = tensor("op_598_cast_fp16")]; + tensor var_599_cast_fp16 = softmax(axis = var_52, x = aw_3_cast_fp16)[name = tensor("op_599_cast_fp16")]; + tensor var_600_cast_fp16 = softmax(axis = var_52, x = aw_5_cast_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_601_cast_fp16 = softmax(axis = var_52, x = aw_7_cast_fp16)[name = tensor("op_601_cast_fp16")]; + tensor var_602_cast_fp16 = softmax(axis = var_52, x = aw_9_cast_fp16)[name = tensor("op_602_cast_fp16")]; + tensor var_603_cast_fp16 = softmax(axis = var_52, x = aw_11_cast_fp16)[name = tensor("op_603_cast_fp16")]; + tensor var_604_cast_fp16 = softmax(axis = var_52, x = aw_13_cast_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_605_cast_fp16 = softmax(axis = var_52, x = aw_15_cast_fp16)[name = tensor("op_605_cast_fp16")]; + tensor var_606_cast_fp16 = softmax(axis = var_52, x = aw_17_cast_fp16)[name = tensor("op_606_cast_fp16")]; + tensor var_607_cast_fp16 = softmax(axis = var_52, x = aw_19_cast_fp16)[name = tensor("op_607_cast_fp16")]; + tensor var_608_cast_fp16 = softmax(axis = var_52, x = aw_21_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_609_cast_fp16 = softmax(axis = var_52, x = aw_23_cast_fp16)[name = tensor("op_609_cast_fp16")]; + tensor var_610_cast_fp16 = softmax(axis = var_52, x = aw_25_cast_fp16)[name = tensor("op_610_cast_fp16")]; + tensor var_611_cast_fp16 = softmax(axis = var_52, x = aw_27_cast_fp16)[name = tensor("op_611_cast_fp16")]; + tensor var_612_cast_fp16 = softmax(axis = var_52, x = aw_29_cast_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_613_cast_fp16 = softmax(axis = var_52, x = aw_31_cast_fp16)[name = tensor("op_613_cast_fp16")]; + tensor var_614_cast_fp16 = softmax(axis = var_52, x = aw_33_cast_fp16)[name = tensor("op_614_cast_fp16")]; + tensor var_615_cast_fp16 = softmax(axis = var_52, x = aw_35_cast_fp16)[name = tensor("op_615_cast_fp16")]; + tensor var_616_cast_fp16 = softmax(axis = var_52, x = aw_37_cast_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_617_cast_fp16 = softmax(axis = var_52, x = aw_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618_cast_fp16 = softmax(axis = var_52, x = aw_41_cast_fp16)[name = tensor("op_618_cast_fp16")]; + tensor var_619_cast_fp16 = softmax(axis = var_52, x = aw_43_cast_fp16)[name = tensor("op_619_cast_fp16")]; + tensor var_620_cast_fp16 = softmax(axis = var_52, x = aw_45_cast_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_621_cast_fp16 = softmax(axis = var_52, x = aw_47_cast_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_623_equation_0 = const()[name = tensor("op_623_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_383_cast_fp16, var_598_cast_fp16))[name = tensor("op_623_cast_fp16")]; + tensor var_625_equation_0 = const()[name = tensor("op_625_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_383_cast_fp16, var_599_cast_fp16))[name = tensor("op_625_cast_fp16")]; + tensor var_627_equation_0 = const()[name = tensor("op_627_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_383_cast_fp16, var_600_cast_fp16))[name = tensor("op_627_cast_fp16")]; + tensor var_629_equation_0 = const()[name = tensor("op_629_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_395_cast_fp16, var_601_cast_fp16))[name = tensor("op_629_cast_fp16")]; + tensor var_631_equation_0 = const()[name = tensor("op_631_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_395_cast_fp16, var_602_cast_fp16))[name = tensor("op_631_cast_fp16")]; + tensor var_633_equation_0 = const()[name = tensor("op_633_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_395_cast_fp16, var_603_cast_fp16))[name = tensor("op_633_cast_fp16")]; + tensor var_635_equation_0 = const()[name = tensor("op_635_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_407_cast_fp16, var_604_cast_fp16))[name = tensor("op_635_cast_fp16")]; + tensor var_637_equation_0 = const()[name = tensor("op_637_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_407_cast_fp16, var_605_cast_fp16))[name = tensor("op_637_cast_fp16")]; + tensor var_639_equation_0 = const()[name = tensor("op_639_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_407_cast_fp16, var_606_cast_fp16))[name = tensor("op_639_cast_fp16")]; + tensor var_641_equation_0 = const()[name = tensor("op_641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_419_cast_fp16, var_607_cast_fp16))[name = tensor("op_641_cast_fp16")]; + tensor var_643_equation_0 = const()[name = tensor("op_643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_419_cast_fp16, var_608_cast_fp16))[name = tensor("op_643_cast_fp16")]; + tensor var_645_equation_0 = const()[name = tensor("op_645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_419_cast_fp16, var_609_cast_fp16))[name = tensor("op_645_cast_fp16")]; + tensor var_647_equation_0 = const()[name = tensor("op_647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_647_cast_fp16 = einsum(equation = var_647_equation_0, values = (var_431_cast_fp16, var_610_cast_fp16))[name = tensor("op_647_cast_fp16")]; + tensor var_649_equation_0 = const()[name = tensor("op_649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_649_cast_fp16 = einsum(equation = var_649_equation_0, values = (var_431_cast_fp16, var_611_cast_fp16))[name = tensor("op_649_cast_fp16")]; + tensor var_651_equation_0 = const()[name = tensor("op_651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_651_cast_fp16 = einsum(equation = var_651_equation_0, values = (var_431_cast_fp16, var_612_cast_fp16))[name = tensor("op_651_cast_fp16")]; + tensor var_653_equation_0 = const()[name = tensor("op_653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_653_cast_fp16 = einsum(equation = var_653_equation_0, values = (var_443_cast_fp16, var_613_cast_fp16))[name = tensor("op_653_cast_fp16")]; + tensor var_655_equation_0 = const()[name = tensor("op_655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_655_cast_fp16 = einsum(equation = var_655_equation_0, values = (var_443_cast_fp16, var_614_cast_fp16))[name = tensor("op_655_cast_fp16")]; + tensor var_657_equation_0 = const()[name = tensor("op_657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_657_cast_fp16 = einsum(equation = var_657_equation_0, values = (var_443_cast_fp16, var_615_cast_fp16))[name = tensor("op_657_cast_fp16")]; + tensor var_659_equation_0 = const()[name = tensor("op_659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_659_cast_fp16 = einsum(equation = var_659_equation_0, values = (var_455_cast_fp16, var_616_cast_fp16))[name = tensor("op_659_cast_fp16")]; + tensor var_661_equation_0 = const()[name = tensor("op_661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_661_cast_fp16 = einsum(equation = var_661_equation_0, values = (var_455_cast_fp16, var_617_cast_fp16))[name = tensor("op_661_cast_fp16")]; + tensor var_663_equation_0 = const()[name = tensor("op_663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_663_cast_fp16 = einsum(equation = var_663_equation_0, values = (var_455_cast_fp16, var_618_cast_fp16))[name = tensor("op_663_cast_fp16")]; + tensor var_665_equation_0 = const()[name = tensor("op_665_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_665_cast_fp16 = einsum(equation = var_665_equation_0, values = (var_467_cast_fp16, var_619_cast_fp16))[name = tensor("op_665_cast_fp16")]; + tensor var_667_equation_0 = const()[name = tensor("op_667_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_667_cast_fp16 = einsum(equation = var_667_equation_0, values = (var_467_cast_fp16, var_620_cast_fp16))[name = tensor("op_667_cast_fp16")]; + tensor var_669_equation_0 = const()[name = tensor("op_669_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_669_cast_fp16 = einsum(equation = var_669_equation_0, values = (var_467_cast_fp16, var_621_cast_fp16))[name = tensor("op_669_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_52, interleave = x_11_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16, var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16, var_647_cast_fp16, var_649_cast_fp16, var_651_cast_fp16, var_653_cast_fp16, var_655_cast_fp16, var_657_cast_fp16, var_659_cast_fp16, var_661_cast_fp16, var_663_cast_fp16, var_665_cast_fp16, var_667_cast_fp16, var_669_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_674 = const()[name = tensor("op_674"), val = tensor([1, 3072, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_674, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_677 = const()[name = tensor("op_677"), val = tensor([1, 1])]; + tensor var_679 = const()[name = tensor("op_679"), val = tensor([1, 1])]; + tensor attention_output_1_pad_type_0 = const()[name = tensor("attention_output_1_pad_type_0"), val = tensor("custom")]; + tensor attention_output_1_pad_0 = const()[name = tensor("attention_output_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31463936)))]; + tensor attention_output_1_cast_fp16 = conv(dilations = var_679, groups = var_52, pad = attention_output_1_pad_0, pad_type = attention_output_1_pad_type_0, strides = var_677, weight = blocks_0_attn_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338368)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_52, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_55, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_705_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338560)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_716 = const()[name = tensor("op_716"), val = tensor([1, 1])]; + tensor var_718 = const()[name = tensor("op_718"), val = tensor([1, 1])]; + tensor input_7_pad_type_0 = const()[name = tensor("input_7_pad_type_0"), val = tensor("custom")]; + tensor input_7_pad_0 = const()[name = tensor("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50344768)))]; + tensor input_7_cast_fp16 = conv(dilations = var_718, groups = var_52, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_716, weight = blocks_0_mlp_fc_1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_722 = const()[name = tensor("op_722"), val = tensor([1, 1])]; + tensor var_724 = const()[name = tensor("op_724"), val = tensor([1, 1])]; + tensor x_fc_2_1_pad_type_0 = const()[name = tensor("x_fc_2_1_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_1_pad_0 = const()[name = tensor("x_fc_2_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100676480)))]; + tensor x_fc_2_1_cast_fp16 = conv(dilations = var_724, groups = var_52, pad = x_fc_2_1_pad_0, pad_type = x_fc_2_1_pad_type_0, strides = var_722, weight = blocks_0_mlp_fc_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_727_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_727_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_727_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_730 = const()[name = tensor("op_730"), val = tensor([1, 1])]; + tensor var_732 = const()[name = tensor("op_732"), val = tensor([1, 1])]; + tensor var_734_pad_type_0 = const()[name = tensor("op_734_pad_type_0"), val = tensor("custom")]; + tensor var_734_pad_0 = const()[name = tensor("op_734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151008192)))]; + tensor var_734_cast_fp16 = conv(dilations = var_732, groups = var_52, pad = var_734_pad_0, pad_type = var_734_pad_type_0, strides = var_730, weight = blocks_0_mlp_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("op_734_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_734_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_740 = const()[name = tensor("op_740"), val = tensor(-1)]; + tensor var_744 = const()[name = tensor("op_744"), val = tensor(-2)]; + tensor var_746 = const()[name = tensor("op_746"), val = tensor(-3)]; + tensor var_779 = const()[name = tensor("op_779"), val = tensor(1)]; + tensor var_782 = const()[name = tensor("op_782"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201339904)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_779, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_782, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_805_to_fp16 = const()[name = tensor("op_805_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_805_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201340096)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_829 = const()[name = tensor("op_829"), val = tensor([1, 3072, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_829, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_832 = const()[name = tensor("op_832"), val = tensor([1, 1])]; + tensor var_834 = const()[name = tensor("op_834"), val = tensor([1, 1])]; + tensor q_9_pad_type_0 = const()[name = tensor("q_9_pad_type_0"), val = tensor("custom")]; + tensor q_9_pad_0 = const()[name = tensor("q_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201346304)))]; + tensor q_9_cast_fp16 = conv(dilations = var_834, groups = var_779, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = var_832, weight = blocks_1_attn_q_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_838 = const()[name = tensor("op_838"), val = tensor([1, 1])]; + tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 1])]; + tensor k_13_pad_type_0 = const()[name = tensor("k_13_pad_type_0"), val = tensor("custom")]; + tensor k_13_pad_0 = const()[name = tensor("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220220736)))]; + tensor k_13_cast_fp16 = conv(dilations = var_840, groups = var_779, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = var_838, weight = blocks_1_attn_k_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_844 = const()[name = tensor("op_844"), val = tensor([1, 1])]; + tensor var_846 = const()[name = tensor("op_846"), val = tensor([1, 1])]; + tensor v_11_pad_type_0 = const()[name = tensor("v_11_pad_type_0"), val = tensor("custom")]; + tensor v_11_pad_0 = const()[name = tensor("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226512256)))]; + tensor v_11_cast_fp16 = conv(dilations = var_846, groups = var_779, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = var_844, weight = blocks_1_attn_v_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_849 = const()[name = tensor("op_849"), val = tensor([1, 24, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_849, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_851 = const()[name = tensor("op_851"), val = tensor([1, -1, 128, 64])]; + tensor k_15_cast_fp16 = reshape(shape = var_851, x = k_13_cast_fp16)[name = tensor("k_15_cast_fp16")]; + tensor var_865_begin_0 = const()[name = tensor("op_865_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_865_end_0 = const()[name = tensor("op_865_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_865_end_mask_0 = const()[name = tensor("op_865_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_865_cast_fp16")]; + tensor var_871_begin_0 = const()[name = tensor("op_871_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_871_end_0 = const()[name = tensor("op_871_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_871_end_mask_0 = const()[name = tensor("op_871_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_871_cast_fp16 = slice_by_index(begin = var_871_begin_0, end = var_871_end_0, end_mask = var_871_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_871_cast_fp16")]; + tensor const_30_promoted_to_fp16 = const()[name = tensor("const_30_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_873_cast_fp16 = mul(x = var_871_cast_fp16, y = const_30_promoted_to_fp16)[name = tensor("op_873_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_744, interleave = rotated_5_interleave_0, values = (var_873_cast_fp16, var_865_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_876_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_876_cast_fp16")]; + tensor var_877_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_877_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_876_cast_fp16, y = var_877_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_890_begin_0 = const()[name = tensor("op_890_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_890_end_0 = const()[name = tensor("op_890_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_890_end_mask_0 = const()[name = tensor("op_890_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_890_cast_fp16 = slice_by_index(begin = var_890_begin_0, end = var_890_end_0, end_mask = var_890_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_890_cast_fp16")]; + tensor var_896_begin_0 = const()[name = tensor("op_896_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_896_end_0 = const()[name = tensor("op_896_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_896_end_mask_0 = const()[name = tensor("op_896_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_896_cast_fp16 = slice_by_index(begin = var_896_begin_0, end = var_896_end_0, end_mask = var_896_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_896_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_898_cast_fp16 = mul(x = var_896_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_898_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_744, interleave = rotated_interleave_0, values = (var_898_cast_fp16, var_890_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_901_cast_fp16 = mul(x = k_15_cast_fp16, y = cos)[name = tensor("op_901_cast_fp16")]; + tensor var_902_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_902_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_901_cast_fp16, y = var_902_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_905 = const()[name = tensor("op_905"), val = tensor([1, -1, 1, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_905, x = roped_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_907 = const()[name = tensor("op_907"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_907, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_21_perm_0, x = k_19_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_746, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_740, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_915 = const()[name = tensor("op_915"), val = tensor([1, 3072, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_915, x = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_920_begin_0 = const()[name = tensor("op_920_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_end_0 = const()[name = tensor("op_920_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_920_end_mask_0 = const()[name = tensor("op_920_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = q_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor var_924_begin_0 = const()[name = tensor("op_924_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_924_end_0 = const()[name = tensor("op_924_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_924_end_mask_0 = const()[name = tensor("op_924_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_924_cast_fp16 = slice_by_index(begin = var_924_begin_0, end = var_924_end_0, end_mask = var_924_end_mask_0, x = q_cast_fp16)[name = tensor("op_924_cast_fp16")]; + tensor var_928_begin_0 = const()[name = tensor("op_928_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_928_end_0 = const()[name = tensor("op_928_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_928_end_mask_0 = const()[name = tensor("op_928_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = q_cast_fp16)[name = tensor("op_928_cast_fp16")]; + tensor var_932_begin_0 = const()[name = tensor("op_932_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_932_end_0 = const()[name = tensor("op_932_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_932_end_mask_0 = const()[name = tensor("op_932_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_932_cast_fp16 = slice_by_index(begin = var_932_begin_0, end = var_932_end_0, end_mask = var_932_end_mask_0, x = q_cast_fp16)[name = tensor("op_932_cast_fp16")]; + tensor var_936_begin_0 = const()[name = tensor("op_936_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_936_end_0 = const()[name = tensor("op_936_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_936_end_mask_0 = const()[name = tensor("op_936_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_936_cast_fp16 = slice_by_index(begin = var_936_begin_0, end = var_936_end_0, end_mask = var_936_end_mask_0, x = q_cast_fp16)[name = tensor("op_936_cast_fp16")]; + tensor var_940_begin_0 = const()[name = tensor("op_940_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_940_end_0 = const()[name = tensor("op_940_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_940_end_mask_0 = const()[name = tensor("op_940_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_940_cast_fp16 = slice_by_index(begin = var_940_begin_0, end = var_940_end_0, end_mask = var_940_end_mask_0, x = q_cast_fp16)[name = tensor("op_940_cast_fp16")]; + tensor var_944_begin_0 = const()[name = tensor("op_944_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_944_end_0 = const()[name = tensor("op_944_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_944_end_mask_0 = const()[name = tensor("op_944_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_944_cast_fp16 = slice_by_index(begin = var_944_begin_0, end = var_944_end_0, end_mask = var_944_end_mask_0, x = q_cast_fp16)[name = tensor("op_944_cast_fp16")]; + tensor var_948_begin_0 = const()[name = tensor("op_948_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_948_end_0 = const()[name = tensor("op_948_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_948_end_mask_0 = const()[name = tensor("op_948_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_948_cast_fp16 = slice_by_index(begin = var_948_begin_0, end = var_948_end_0, end_mask = var_948_end_mask_0, x = q_cast_fp16)[name = tensor("op_948_cast_fp16")]; + tensor var_952_begin_0 = const()[name = tensor("op_952_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_952_end_0 = const()[name = tensor("op_952_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_952_end_mask_0 = const()[name = tensor("op_952_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_952_cast_fp16 = slice_by_index(begin = var_952_begin_0, end = var_952_end_0, end_mask = var_952_end_mask_0, x = q_cast_fp16)[name = tensor("op_952_cast_fp16")]; + tensor var_956_begin_0 = const()[name = tensor("op_956_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_956_end_0 = const()[name = tensor("op_956_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_956_end_mask_0 = const()[name = tensor("op_956_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_956_cast_fp16 = slice_by_index(begin = var_956_begin_0, end = var_956_end_0, end_mask = var_956_end_mask_0, x = q_cast_fp16)[name = tensor("op_956_cast_fp16")]; + tensor var_960_begin_0 = const()[name = tensor("op_960_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_960_end_0 = const()[name = tensor("op_960_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_960_end_mask_0 = const()[name = tensor("op_960_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_960_cast_fp16 = slice_by_index(begin = var_960_begin_0, end = var_960_end_0, end_mask = var_960_end_mask_0, x = q_cast_fp16)[name = tensor("op_960_cast_fp16")]; + tensor var_964_begin_0 = const()[name = tensor("op_964_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_964_end_0 = const()[name = tensor("op_964_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_964_end_mask_0 = const()[name = tensor("op_964_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_964_cast_fp16 = slice_by_index(begin = var_964_begin_0, end = var_964_end_0, end_mask = var_964_end_mask_0, x = q_cast_fp16)[name = tensor("op_964_cast_fp16")]; + tensor var_968_begin_0 = const()[name = tensor("op_968_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_968_end_0 = const()[name = tensor("op_968_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_968_end_mask_0 = const()[name = tensor("op_968_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = q_cast_fp16)[name = tensor("op_968_cast_fp16")]; + tensor var_972_begin_0 = const()[name = tensor("op_972_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_972_end_0 = const()[name = tensor("op_972_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_972_end_mask_0 = const()[name = tensor("op_972_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = q_cast_fp16)[name = tensor("op_972_cast_fp16")]; + tensor var_976_begin_0 = const()[name = tensor("op_976_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_976_end_0 = const()[name = tensor("op_976_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_976_end_mask_0 = const()[name = tensor("op_976_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = var_976_end_0, end_mask = var_976_end_mask_0, x = q_cast_fp16)[name = tensor("op_976_cast_fp16")]; + tensor var_980_begin_0 = const()[name = tensor("op_980_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_980_end_0 = const()[name = tensor("op_980_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_980_end_mask_0 = const()[name = tensor("op_980_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_980_cast_fp16 = slice_by_index(begin = var_980_begin_0, end = var_980_end_0, end_mask = var_980_end_mask_0, x = q_cast_fp16)[name = tensor("op_980_cast_fp16")]; + tensor var_984_begin_0 = const()[name = tensor("op_984_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_984_end_0 = const()[name = tensor("op_984_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_984_end_mask_0 = const()[name = tensor("op_984_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_984_cast_fp16 = slice_by_index(begin = var_984_begin_0, end = var_984_end_0, end_mask = var_984_end_mask_0, x = q_cast_fp16)[name = tensor("op_984_cast_fp16")]; + tensor var_988_begin_0 = const()[name = tensor("op_988_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_988_end_0 = const()[name = tensor("op_988_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_988_end_mask_0 = const()[name = tensor("op_988_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_988_cast_fp16 = slice_by_index(begin = var_988_begin_0, end = var_988_end_0, end_mask = var_988_end_mask_0, x = q_cast_fp16)[name = tensor("op_988_cast_fp16")]; + tensor var_992_begin_0 = const()[name = tensor("op_992_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_992_end_0 = const()[name = tensor("op_992_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_992_end_mask_0 = const()[name = tensor("op_992_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_992_cast_fp16 = slice_by_index(begin = var_992_begin_0, end = var_992_end_0, end_mask = var_992_end_mask_0, x = q_cast_fp16)[name = tensor("op_992_cast_fp16")]; + tensor var_996_begin_0 = const()[name = tensor("op_996_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_996_end_0 = const()[name = tensor("op_996_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_996_end_mask_0 = const()[name = tensor("op_996_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_996_cast_fp16 = slice_by_index(begin = var_996_begin_0, end = var_996_end_0, end_mask = var_996_end_mask_0, x = q_cast_fp16)[name = tensor("op_996_cast_fp16")]; + tensor var_1000_begin_0 = const()[name = tensor("op_1000_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1000_end_0 = const()[name = tensor("op_1000_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1000_end_mask_0 = const()[name = tensor("op_1000_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = var_1000_end_0, end_mask = var_1000_end_mask_0, x = q_cast_fp16)[name = tensor("op_1000_cast_fp16")]; + tensor var_1004_begin_0 = const()[name = tensor("op_1004_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1004_end_0 = const()[name = tensor("op_1004_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1004_end_mask_0 = const()[name = tensor("op_1004_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1004_cast_fp16 = slice_by_index(begin = var_1004_begin_0, end = var_1004_end_0, end_mask = var_1004_end_mask_0, x = q_cast_fp16)[name = tensor("op_1004_cast_fp16")]; + tensor var_1008_begin_0 = const()[name = tensor("op_1008_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1008_end_0 = const()[name = tensor("op_1008_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1008_end_mask_0 = const()[name = tensor("op_1008_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = var_1008_end_0, end_mask = var_1008_end_mask_0, x = q_cast_fp16)[name = tensor("op_1008_cast_fp16")]; + tensor var_1012_begin_0 = const()[name = tensor("op_1012_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1012_end_0 = const()[name = tensor("op_1012_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1012_end_mask_0 = const()[name = tensor("op_1012_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1012_cast_fp16 = slice_by_index(begin = var_1012_begin_0, end = var_1012_end_0, end_mask = var_1012_end_mask_0, x = q_cast_fp16)[name = tensor("op_1012_cast_fp16")]; + tensor var_1018_begin_0 = const()[name = tensor("op_1018_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1018_end_0 = const()[name = tensor("op_1018_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1018_end_mask_0 = const()[name = tensor("op_1018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1018_cast_fp16 = slice_by_index(begin = var_1018_begin_0, end = var_1018_end_0, end_mask = var_1018_end_mask_0, x = k_cast_fp16)[name = tensor("op_1018_cast_fp16")]; + tensor var_1030_begin_0 = const()[name = tensor("op_1030_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1030_end_0 = const()[name = tensor("op_1030_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1030_end_mask_0 = const()[name = tensor("op_1030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, x = k_cast_fp16)[name = tensor("op_1030_cast_fp16")]; + tensor var_1042_begin_0 = const()[name = tensor("op_1042_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1042_end_0 = const()[name = tensor("op_1042_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1042_end_mask_0 = const()[name = tensor("op_1042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = k_cast_fp16)[name = tensor("op_1042_cast_fp16")]; + tensor var_1054_begin_0 = const()[name = tensor("op_1054_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1054_end_0 = const()[name = tensor("op_1054_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1054_end_mask_0 = const()[name = tensor("op_1054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1054_cast_fp16 = slice_by_index(begin = var_1054_begin_0, end = var_1054_end_0, end_mask = var_1054_end_mask_0, x = k_cast_fp16)[name = tensor("op_1054_cast_fp16")]; + tensor var_1066_begin_0 = const()[name = tensor("op_1066_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1066_end_0 = const()[name = tensor("op_1066_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1066_end_mask_0 = const()[name = tensor("op_1066_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1066_cast_fp16 = slice_by_index(begin = var_1066_begin_0, end = var_1066_end_0, end_mask = var_1066_end_mask_0, x = k_cast_fp16)[name = tensor("op_1066_cast_fp16")]; + tensor var_1078_begin_0 = const()[name = tensor("op_1078_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1078_end_0 = const()[name = tensor("op_1078_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1078_end_mask_0 = const()[name = tensor("op_1078_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1078_cast_fp16 = slice_by_index(begin = var_1078_begin_0, end = var_1078_end_0, end_mask = var_1078_end_mask_0, x = k_cast_fp16)[name = tensor("op_1078_cast_fp16")]; + tensor var_1090_begin_0 = const()[name = tensor("op_1090_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1090_end_0 = const()[name = tensor("op_1090_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1090_end_mask_0 = const()[name = tensor("op_1090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = k_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor var_1102_begin_0 = const()[name = tensor("op_1102_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1102_end_0 = const()[name = tensor("op_1102_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1102_end_mask_0 = const()[name = tensor("op_1102_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1102_cast_fp16 = slice_by_index(begin = var_1102_begin_0, end = var_1102_end_0, end_mask = var_1102_end_mask_0, x = k_cast_fp16)[name = tensor("op_1102_cast_fp16")]; + tensor var_1112_begin_0 = const()[name = tensor("op_1112_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1112_end_0 = const()[name = tensor("op_1112_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1112_end_mask_0 = const()[name = tensor("op_1112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1112_cast_fp16")]; + tensor var_1124_begin_0 = const()[name = tensor("op_1124_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1124_end_0 = const()[name = tensor("op_1124_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1124_end_mask_0 = const()[name = tensor("op_1124_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1124_cast_fp16")]; + tensor var_1136_begin_0 = const()[name = tensor("op_1136_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1136_end_0 = const()[name = tensor("op_1136_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1136_end_mask_0 = const()[name = tensor("op_1136_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1136_cast_fp16")]; + tensor var_1148_begin_0 = const()[name = tensor("op_1148_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1148_end_0 = const()[name = tensor("op_1148_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1148_end_mask_0 = const()[name = tensor("op_1148_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1148_cast_fp16")]; + tensor var_1160_begin_0 = const()[name = tensor("op_1160_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1160_end_0 = const()[name = tensor("op_1160_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1160_end_mask_0 = const()[name = tensor("op_1160_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1160_cast_fp16")]; + tensor var_1172_begin_0 = const()[name = tensor("op_1172_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1172_end_0 = const()[name = tensor("op_1172_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1172_end_mask_0 = const()[name = tensor("op_1172_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1172_cast_fp16 = slice_by_index(begin = var_1172_begin_0, end = var_1172_end_0, end_mask = var_1172_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1172_cast_fp16")]; + tensor var_1184_begin_0 = const()[name = tensor("op_1184_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1184_end_0 = const()[name = tensor("op_1184_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1184_end_mask_0 = const()[name = tensor("op_1184_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1184_cast_fp16 = slice_by_index(begin = var_1184_begin_0, end = var_1184_end_0, end_mask = var_1184_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1184_cast_fp16")]; + tensor var_1196_begin_0 = const()[name = tensor("op_1196_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1196_end_0 = const()[name = tensor("op_1196_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1196_end_mask_0 = const()[name = tensor("op_1196_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1196_cast_fp16")]; + tensor var_1208_equation_0 = const()[name = tensor("op_1208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1208_cast_fp16 = einsum(equation = var_1208_equation_0, values = (var_1018_cast_fp16, var_920_cast_fp16))[name = tensor("op_1208_cast_fp16")]; + tensor var_1209_to_fp16 = const()[name = tensor("op_1209_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1210_cast_fp16 = mul(x = var_1208_cast_fp16, y = var_1209_to_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1212_equation_0 = const()[name = tensor("op_1212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1212_cast_fp16 = einsum(equation = var_1212_equation_0, values = (var_1018_cast_fp16, var_924_cast_fp16))[name = tensor("op_1212_cast_fp16")]; + tensor var_1213_to_fp16 = const()[name = tensor("op_1213_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1214_cast_fp16 = mul(x = var_1212_cast_fp16, y = var_1213_to_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1216_equation_0 = const()[name = tensor("op_1216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1216_cast_fp16 = einsum(equation = var_1216_equation_0, values = (var_1018_cast_fp16, var_928_cast_fp16))[name = tensor("op_1216_cast_fp16")]; + tensor var_1217_to_fp16 = const()[name = tensor("op_1217_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1218_cast_fp16 = mul(x = var_1216_cast_fp16, y = var_1217_to_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1220_equation_0 = const()[name = tensor("op_1220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1220_cast_fp16 = einsum(equation = var_1220_equation_0, values = (var_1030_cast_fp16, var_932_cast_fp16))[name = tensor("op_1220_cast_fp16")]; + tensor var_1221_to_fp16 = const()[name = tensor("op_1221_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1222_cast_fp16 = mul(x = var_1220_cast_fp16, y = var_1221_to_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1224_equation_0 = const()[name = tensor("op_1224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1224_cast_fp16 = einsum(equation = var_1224_equation_0, values = (var_1030_cast_fp16, var_936_cast_fp16))[name = tensor("op_1224_cast_fp16")]; + tensor var_1225_to_fp16 = const()[name = tensor("op_1225_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1226_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1030_cast_fp16, var_940_cast_fp16))[name = tensor("op_1228_cast_fp16")]; + tensor var_1229_to_fp16 = const()[name = tensor("op_1229_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1230_cast_fp16 = mul(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1042_cast_fp16, var_944_cast_fp16))[name = tensor("op_1232_cast_fp16")]; + tensor var_1233_to_fp16 = const()[name = tensor("op_1233_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1234_cast_fp16 = mul(x = var_1232_cast_fp16, y = var_1233_to_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1042_cast_fp16, var_948_cast_fp16))[name = tensor("op_1236_cast_fp16")]; + tensor var_1237_to_fp16 = const()[name = tensor("op_1237_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1238_cast_fp16 = mul(x = var_1236_cast_fp16, y = var_1237_to_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1042_cast_fp16, var_952_cast_fp16))[name = tensor("op_1240_cast_fp16")]; + tensor var_1241_to_fp16 = const()[name = tensor("op_1241_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1242_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1054_cast_fp16, var_956_cast_fp16))[name = tensor("op_1244_cast_fp16")]; + tensor var_1245_to_fp16 = const()[name = tensor("op_1245_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1246_cast_fp16 = mul(x = var_1244_cast_fp16, y = var_1245_to_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1054_cast_fp16, var_960_cast_fp16))[name = tensor("op_1248_cast_fp16")]; + tensor var_1249_to_fp16 = const()[name = tensor("op_1249_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1250_cast_fp16 = mul(x = var_1248_cast_fp16, y = var_1249_to_fp16)[name = tensor("op_1250_cast_fp16")]; + tensor var_1252_equation_0 = const()[name = tensor("op_1252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1054_cast_fp16, var_964_cast_fp16))[name = tensor("op_1252_cast_fp16")]; + tensor var_1253_to_fp16 = const()[name = tensor("op_1253_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1254_cast_fp16 = mul(x = var_1252_cast_fp16, y = var_1253_to_fp16)[name = tensor("op_1254_cast_fp16")]; + tensor var_1256_equation_0 = const()[name = tensor("op_1256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1066_cast_fp16, var_968_cast_fp16))[name = tensor("op_1256_cast_fp16")]; + tensor var_1257_to_fp16 = const()[name = tensor("op_1257_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1258_cast_fp16 = mul(x = var_1256_cast_fp16, y = var_1257_to_fp16)[name = tensor("op_1258_cast_fp16")]; + tensor var_1260_equation_0 = const()[name = tensor("op_1260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1066_cast_fp16, var_972_cast_fp16))[name = tensor("op_1260_cast_fp16")]; + tensor var_1261_to_fp16 = const()[name = tensor("op_1261_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1262_cast_fp16 = mul(x = var_1260_cast_fp16, y = var_1261_to_fp16)[name = tensor("op_1262_cast_fp16")]; + tensor var_1264_equation_0 = const()[name = tensor("op_1264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1066_cast_fp16, var_976_cast_fp16))[name = tensor("op_1264_cast_fp16")]; + tensor var_1265_to_fp16 = const()[name = tensor("op_1265_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1266_cast_fp16 = mul(x = var_1264_cast_fp16, y = var_1265_to_fp16)[name = tensor("op_1266_cast_fp16")]; + tensor var_1268_equation_0 = const()[name = tensor("op_1268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1078_cast_fp16, var_980_cast_fp16))[name = tensor("op_1268_cast_fp16")]; + tensor var_1269_to_fp16 = const()[name = tensor("op_1269_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1270_cast_fp16 = mul(x = var_1268_cast_fp16, y = var_1269_to_fp16)[name = tensor("op_1270_cast_fp16")]; + tensor var_1272_equation_0 = const()[name = tensor("op_1272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1078_cast_fp16, var_984_cast_fp16))[name = tensor("op_1272_cast_fp16")]; + tensor var_1273_to_fp16 = const()[name = tensor("op_1273_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1274_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = tensor("op_1274_cast_fp16")]; + tensor var_1276_equation_0 = const()[name = tensor("op_1276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1078_cast_fp16, var_988_cast_fp16))[name = tensor("op_1276_cast_fp16")]; + tensor var_1277_to_fp16 = const()[name = tensor("op_1277_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1278_cast_fp16 = mul(x = var_1276_cast_fp16, y = var_1277_to_fp16)[name = tensor("op_1278_cast_fp16")]; + tensor var_1280_equation_0 = const()[name = tensor("op_1280_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1090_cast_fp16, var_992_cast_fp16))[name = tensor("op_1280_cast_fp16")]; + tensor var_1281_to_fp16 = const()[name = tensor("op_1281_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1282_cast_fp16 = mul(x = var_1280_cast_fp16, y = var_1281_to_fp16)[name = tensor("op_1282_cast_fp16")]; + tensor var_1284_equation_0 = const()[name = tensor("op_1284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1284_cast_fp16 = einsum(equation = var_1284_equation_0, values = (var_1090_cast_fp16, var_996_cast_fp16))[name = tensor("op_1284_cast_fp16")]; + tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1286_cast_fp16 = mul(x = var_1284_cast_fp16, y = var_1285_to_fp16)[name = tensor("op_1286_cast_fp16")]; + tensor var_1288_equation_0 = const()[name = tensor("op_1288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1288_cast_fp16 = einsum(equation = var_1288_equation_0, values = (var_1090_cast_fp16, var_1000_cast_fp16))[name = tensor("op_1288_cast_fp16")]; + tensor var_1289_to_fp16 = const()[name = tensor("op_1289_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1290_cast_fp16 = mul(x = var_1288_cast_fp16, y = var_1289_to_fp16)[name = tensor("op_1290_cast_fp16")]; + tensor var_1292_equation_0 = const()[name = tensor("op_1292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1292_cast_fp16 = einsum(equation = var_1292_equation_0, values = (var_1102_cast_fp16, var_1004_cast_fp16))[name = tensor("op_1292_cast_fp16")]; + tensor var_1293_to_fp16 = const()[name = tensor("op_1293_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1294_cast_fp16 = mul(x = var_1292_cast_fp16, y = var_1293_to_fp16)[name = tensor("op_1294_cast_fp16")]; + tensor var_1296_equation_0 = const()[name = tensor("op_1296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1296_cast_fp16 = einsum(equation = var_1296_equation_0, values = (var_1102_cast_fp16, var_1008_cast_fp16))[name = tensor("op_1296_cast_fp16")]; + tensor var_1297_to_fp16 = const()[name = tensor("op_1297_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1298_cast_fp16 = mul(x = var_1296_cast_fp16, y = var_1297_to_fp16)[name = tensor("op_1298_cast_fp16")]; + tensor var_1300_equation_0 = const()[name = tensor("op_1300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1300_cast_fp16 = einsum(equation = var_1300_equation_0, values = (var_1102_cast_fp16, var_1012_cast_fp16))[name = tensor("op_1300_cast_fp16")]; + tensor var_1301_to_fp16 = const()[name = tensor("op_1301_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1302_cast_fp16 = mul(x = var_1300_cast_fp16, y = var_1301_to_fp16)[name = tensor("op_1302_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_1210_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_1214_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_1218_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_1222_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_1226_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_1230_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_1234_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_1238_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1242_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1246_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1250_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1254_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1258_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1262_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1266_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1270_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1274_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1278_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1282_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1286_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1290_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1294_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1298_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_1302_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_1327_cast_fp16 = softmax(axis = var_779, x = aw_49_cast_fp16)[name = tensor("op_1327_cast_fp16")]; + tensor var_1328_cast_fp16 = softmax(axis = var_779, x = aw_51_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1329_cast_fp16 = softmax(axis = var_779, x = aw_53_cast_fp16)[name = tensor("op_1329_cast_fp16")]; + tensor var_1330_cast_fp16 = softmax(axis = var_779, x = aw_55_cast_fp16)[name = tensor("op_1330_cast_fp16")]; + tensor var_1331_cast_fp16 = softmax(axis = var_779, x = aw_57_cast_fp16)[name = tensor("op_1331_cast_fp16")]; + tensor var_1332_cast_fp16 = softmax(axis = var_779, x = aw_59_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1333_cast_fp16 = softmax(axis = var_779, x = aw_61_cast_fp16)[name = tensor("op_1333_cast_fp16")]; + tensor var_1334_cast_fp16 = softmax(axis = var_779, x = aw_63_cast_fp16)[name = tensor("op_1334_cast_fp16")]; + tensor var_1335_cast_fp16 = softmax(axis = var_779, x = aw_65_cast_fp16)[name = tensor("op_1335_cast_fp16")]; + tensor var_1336_cast_fp16 = softmax(axis = var_779, x = aw_67_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1337_cast_fp16 = softmax(axis = var_779, x = aw_69_cast_fp16)[name = tensor("op_1337_cast_fp16")]; + tensor var_1338_cast_fp16 = softmax(axis = var_779, x = aw_71_cast_fp16)[name = tensor("op_1338_cast_fp16")]; + tensor var_1339_cast_fp16 = softmax(axis = var_779, x = aw_73_cast_fp16)[name = tensor("op_1339_cast_fp16")]; + tensor var_1340_cast_fp16 = softmax(axis = var_779, x = aw_75_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1341_cast_fp16 = softmax(axis = var_779, x = aw_77_cast_fp16)[name = tensor("op_1341_cast_fp16")]; + tensor var_1342_cast_fp16 = softmax(axis = var_779, x = aw_79_cast_fp16)[name = tensor("op_1342_cast_fp16")]; + tensor var_1343_cast_fp16 = softmax(axis = var_779, x = aw_81_cast_fp16)[name = tensor("op_1343_cast_fp16")]; + tensor var_1344_cast_fp16 = softmax(axis = var_779, x = aw_83_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1345_cast_fp16 = softmax(axis = var_779, x = aw_85_cast_fp16)[name = tensor("op_1345_cast_fp16")]; + tensor var_1346_cast_fp16 = softmax(axis = var_779, x = aw_87_cast_fp16)[name = tensor("op_1346_cast_fp16")]; + tensor var_1347_cast_fp16 = softmax(axis = var_779, x = aw_89_cast_fp16)[name = tensor("op_1347_cast_fp16")]; + tensor var_1348_cast_fp16 = softmax(axis = var_779, x = aw_91_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1349_cast_fp16 = softmax(axis = var_779, x = aw_93_cast_fp16)[name = tensor("op_1349_cast_fp16")]; + tensor var_1350_cast_fp16 = softmax(axis = var_779, x = aw_cast_fp16)[name = tensor("op_1350_cast_fp16")]; + tensor var_1352_equation_0 = const()[name = tensor("op_1352_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1352_cast_fp16 = einsum(equation = var_1352_equation_0, values = (var_1112_cast_fp16, var_1327_cast_fp16))[name = tensor("op_1352_cast_fp16")]; + tensor var_1354_equation_0 = const()[name = tensor("op_1354_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1354_cast_fp16 = einsum(equation = var_1354_equation_0, values = (var_1112_cast_fp16, var_1328_cast_fp16))[name = tensor("op_1354_cast_fp16")]; + tensor var_1356_equation_0 = const()[name = tensor("op_1356_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1356_cast_fp16 = einsum(equation = var_1356_equation_0, values = (var_1112_cast_fp16, var_1329_cast_fp16))[name = tensor("op_1356_cast_fp16")]; + tensor var_1358_equation_0 = const()[name = tensor("op_1358_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1358_cast_fp16 = einsum(equation = var_1358_equation_0, values = (var_1124_cast_fp16, var_1330_cast_fp16))[name = tensor("op_1358_cast_fp16")]; + tensor var_1360_equation_0 = const()[name = tensor("op_1360_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1360_cast_fp16 = einsum(equation = var_1360_equation_0, values = (var_1124_cast_fp16, var_1331_cast_fp16))[name = tensor("op_1360_cast_fp16")]; + tensor var_1362_equation_0 = const()[name = tensor("op_1362_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1362_cast_fp16 = einsum(equation = var_1362_equation_0, values = (var_1124_cast_fp16, var_1332_cast_fp16))[name = tensor("op_1362_cast_fp16")]; + tensor var_1364_equation_0 = const()[name = tensor("op_1364_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1364_cast_fp16 = einsum(equation = var_1364_equation_0, values = (var_1136_cast_fp16, var_1333_cast_fp16))[name = tensor("op_1364_cast_fp16")]; + tensor var_1366_equation_0 = const()[name = tensor("op_1366_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1366_cast_fp16 = einsum(equation = var_1366_equation_0, values = (var_1136_cast_fp16, var_1334_cast_fp16))[name = tensor("op_1366_cast_fp16")]; + tensor var_1368_equation_0 = const()[name = tensor("op_1368_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1368_cast_fp16 = einsum(equation = var_1368_equation_0, values = (var_1136_cast_fp16, var_1335_cast_fp16))[name = tensor("op_1368_cast_fp16")]; + tensor var_1370_equation_0 = const()[name = tensor("op_1370_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1370_cast_fp16 = einsum(equation = var_1370_equation_0, values = (var_1148_cast_fp16, var_1336_cast_fp16))[name = tensor("op_1370_cast_fp16")]; + tensor var_1372_equation_0 = const()[name = tensor("op_1372_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1372_cast_fp16 = einsum(equation = var_1372_equation_0, values = (var_1148_cast_fp16, var_1337_cast_fp16))[name = tensor("op_1372_cast_fp16")]; + tensor var_1374_equation_0 = const()[name = tensor("op_1374_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1374_cast_fp16 = einsum(equation = var_1374_equation_0, values = (var_1148_cast_fp16, var_1338_cast_fp16))[name = tensor("op_1374_cast_fp16")]; + tensor var_1376_equation_0 = const()[name = tensor("op_1376_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1376_cast_fp16 = einsum(equation = var_1376_equation_0, values = (var_1160_cast_fp16, var_1339_cast_fp16))[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_equation_0 = const()[name = tensor("op_1378_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1378_cast_fp16 = einsum(equation = var_1378_equation_0, values = (var_1160_cast_fp16, var_1340_cast_fp16))[name = tensor("op_1378_cast_fp16")]; + tensor var_1380_equation_0 = const()[name = tensor("op_1380_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1380_cast_fp16 = einsum(equation = var_1380_equation_0, values = (var_1160_cast_fp16, var_1341_cast_fp16))[name = tensor("op_1380_cast_fp16")]; + tensor var_1382_equation_0 = const()[name = tensor("op_1382_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1382_cast_fp16 = einsum(equation = var_1382_equation_0, values = (var_1172_cast_fp16, var_1342_cast_fp16))[name = tensor("op_1382_cast_fp16")]; + tensor var_1384_equation_0 = const()[name = tensor("op_1384_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1384_cast_fp16 = einsum(equation = var_1384_equation_0, values = (var_1172_cast_fp16, var_1343_cast_fp16))[name = tensor("op_1384_cast_fp16")]; + tensor var_1386_equation_0 = const()[name = tensor("op_1386_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1386_cast_fp16 = einsum(equation = var_1386_equation_0, values = (var_1172_cast_fp16, var_1344_cast_fp16))[name = tensor("op_1386_cast_fp16")]; + tensor var_1388_equation_0 = const()[name = tensor("op_1388_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1388_cast_fp16 = einsum(equation = var_1388_equation_0, values = (var_1184_cast_fp16, var_1345_cast_fp16))[name = tensor("op_1388_cast_fp16")]; + tensor var_1390_equation_0 = const()[name = tensor("op_1390_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1390_cast_fp16 = einsum(equation = var_1390_equation_0, values = (var_1184_cast_fp16, var_1346_cast_fp16))[name = tensor("op_1390_cast_fp16")]; + tensor var_1392_equation_0 = const()[name = tensor("op_1392_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1392_cast_fp16 = einsum(equation = var_1392_equation_0, values = (var_1184_cast_fp16, var_1347_cast_fp16))[name = tensor("op_1392_cast_fp16")]; + tensor var_1394_equation_0 = const()[name = tensor("op_1394_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1394_cast_fp16 = einsum(equation = var_1394_equation_0, values = (var_1196_cast_fp16, var_1348_cast_fp16))[name = tensor("op_1394_cast_fp16")]; + tensor var_1396_equation_0 = const()[name = tensor("op_1396_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1396_cast_fp16 = einsum(equation = var_1396_equation_0, values = (var_1196_cast_fp16, var_1349_cast_fp16))[name = tensor("op_1396_cast_fp16")]; + tensor var_1398_equation_0 = const()[name = tensor("op_1398_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1398_cast_fp16 = einsum(equation = var_1398_equation_0, values = (var_1196_cast_fp16, var_1350_cast_fp16))[name = tensor("op_1398_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_779, interleave = x_27_interleave_0, values = (var_1352_cast_fp16, var_1354_cast_fp16, var_1356_cast_fp16, var_1358_cast_fp16, var_1360_cast_fp16, var_1362_cast_fp16, var_1364_cast_fp16, var_1366_cast_fp16, var_1368_cast_fp16, var_1370_cast_fp16, var_1372_cast_fp16, var_1374_cast_fp16, var_1376_cast_fp16, var_1378_cast_fp16, var_1380_cast_fp16, var_1382_cast_fp16, var_1384_cast_fp16, var_1386_cast_fp16, var_1388_cast_fp16, var_1390_cast_fp16, var_1392_cast_fp16, var_1394_cast_fp16, var_1396_cast_fp16, var_1398_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1403 = const()[name = tensor("op_1403"), val = tensor([1, 3072, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1403, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1406 = const()[name = tensor("op_1406"), val = tensor([1, 1])]; + tensor var_1408 = const()[name = tensor("op_1408"), val = tensor([1, 1])]; + tensor attention_output_pad_type_0 = const()[name = tensor("attention_output_pad_type_0"), val = tensor("custom")]; + tensor attention_output_pad_0 = const()[name = tensor("attention_output_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232803776)))]; + tensor attention_output_cast_fp16 = conv(dilations = var_1408, groups = var_779, pad = attention_output_pad_0, pad_type = attention_output_pad_type_0, strides = var_1406, weight = blocks_1_attn_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678208)))]; + tensor x_eps_cast_fp16 = concat(axis = var_779, interleave = x_eps_interleave_0, values = (x_29_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_782, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1434_to_fp16 = const()[name = tensor("op_1434_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1434_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678400)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1445 = const()[name = tensor("op_1445"), val = tensor([1, 1])]; + tensor var_1447 = const()[name = tensor("op_1447"), val = tensor([1, 1])]; + tensor input_17_pad_type_0 = const()[name = tensor("input_17_pad_type_0"), val = tensor("custom")]; + tensor input_17_pad_0 = const()[name = tensor("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251684608)))]; + tensor input_17_cast_fp16 = conv(dilations = var_1447, groups = var_779, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_1445, weight = blocks_1_mlp_fc_1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1451 = const()[name = tensor("op_1451"), val = tensor([1, 1])]; + tensor var_1453 = const()[name = tensor("op_1453"), val = tensor([1, 1])]; + tensor x_fc_2_pad_type_0 = const()[name = tensor("x_fc_2_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_pad_0 = const()[name = tensor("x_fc_2_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302016320)))]; + tensor x_fc_2_cast_fp16 = conv(dilations = var_1453, groups = var_779, pad = x_fc_2_pad_0, pad_type = x_fc_2_pad_type_0, strides = var_1451, weight = blocks_1_mlp_fc_2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_1456_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1456_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_1456_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_1459 = const()[name = tensor("op_1459"), val = tensor([1, 1])]; + tensor var_1461 = const()[name = tensor("op_1461"), val = tensor([1, 1])]; + tensor var_1463_pad_type_0 = const()[name = tensor("op_1463_pad_type_0"), val = tensor("custom")]; + tensor var_1463_pad_0 = const()[name = tensor("op_1463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(352348032)))]; + tensor var_1463_cast_fp16 = conv(dilations = var_1461, groups = var_779, pad = var_1463_pad_0, pad_type = var_1463_pad_type_0, strides = var_1459, weight = blocks_1_mlp_proj_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_1463_cast_fp16")]; + tensor new_x = add(x = var_1463_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_1464_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_v_cache_0, new_k_cache_1, new_v_cache_1); +} \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk6.mlmodelc/weights/weight.bin b/Llama-3.2-3B-Instruct_chunk6.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..d35abe65b9bcbba35955389a8d068fb90b334461 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk6.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a03a9e7bc84b5fedc7469722a7cad217dff44099167118db3559a706ff7b701 +size 402679744 diff --git a/Llama-3.2-3B-Instruct_chunk7.mlmodelc/analytics/coremldata.bin b/Llama-3.2-3B-Instruct_chunk7.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a63af39cde8e590e41fffd270ab8aede737490d --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk7.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf21e446e7587de3fd840eae95f3e79729298df568725552f7ef5fd8f954e58c +size 243 diff --git a/Llama-3.2-3B-Instruct_chunk7.mlmodelc/coremldata.bin b/Llama-3.2-3B-Instruct_chunk7.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef844658693d8a7fc2951abf2761f8f5f9bc62c3 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk7.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8129d684aa1ea8b76708a186fe44f7ffc4aa08b4854907105fe41c0825e71875 +size 653 diff --git a/Llama-3.2-3B-Instruct_chunk7.mlmodelc/metadata.json b/Llama-3.2-3B-Instruct_chunk7.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..50c03439bcb5ac3b637aea87cd1a35808af38d03 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk7.mlmodelc/metadata.json @@ -0,0 +1,178 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 14, + "Ios16.mul" : 70, + "SliceByIndex" : 88, + "Transpose" : 2, + "Ios16.einsum" : 96, + "Ios16.conv" : 14, + "Ios16.add" : 56, + "Ios16.realDiv" : 4, + "Ios16.softmax" : 48, + "Ios16.reduceL2Norm" : 4, + "Ios16.reshape" : 14, + "Ios16.silu" : 2 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 512, 1, 64]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_3_2_3B_Instruct_2024_11_09_16_14_37_chunk7", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk7.mlmodelc/model.mil b/Llama-3.2-3B-Instruct_chunk7.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..78594b4291dc45ae43652f9a31200581b19ad3c6 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk7.mlmodelc/model.mil @@ -0,0 +1,956 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { + tensor var_13 = const()[name = tensor("op_13"), val = tensor(-1)]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-2)]; + tensor var_19 = const()[name = tensor("op_19"), val = tensor(-3)]; + tensor var_52 = const()[name = tensor("op_52"), val = tensor(1)]; + tensor var_55 = const()[name = tensor("op_55"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_52, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_55, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_79_to_fp16 = const()[name = tensor("op_79_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_79_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_100 = const()[name = tensor("op_100"), val = tensor([1, 3072, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_100, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_103 = const()[name = tensor("op_103"), val = tensor([1, 1])]; + tensor var_105 = const()[name = tensor("op_105"), val = tensor([1, 1])]; + tensor q_1_pad_type_0 = const()[name = tensor("q_1_pad_type_0"), val = tensor("custom")]; + tensor q_1_pad_0 = const()[name = tensor("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6464)))]; + tensor q_1_cast_fp16 = conv(dilations = var_105, groups = var_52, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = var_103, weight = blocks_0_attn_q_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_109 = const()[name = tensor("op_109"), val = tensor([1, 1])]; + tensor var_111 = const()[name = tensor("op_111"), val = tensor([1, 1])]; + tensor k_1_pad_type_0 = const()[name = tensor("k_1_pad_type_0"), val = tensor("custom")]; + tensor k_1_pad_0 = const()[name = tensor("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18880896)))]; + tensor k_1_cast_fp16 = conv(dilations = var_111, groups = var_52, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = var_109, weight = blocks_0_attn_k_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_115 = const()[name = tensor("op_115"), val = tensor([1, 1])]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor v_1_pad_type_0 = const()[name = tensor("v_1_pad_type_0"), val = tensor("custom")]; + tensor v_1_pad_0 = const()[name = tensor("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25172416)))]; + tensor v_1_cast_fp16 = conv(dilations = var_117, groups = var_52, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = var_115, weight = blocks_0_attn_v_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_120 = const()[name = tensor("op_120"), val = tensor([1, 24, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_120, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_122 = const()[name = tensor("op_122"), val = tensor([1, -1, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_122, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_136_begin_0 = const()[name = tensor("op_136_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_136_end_0 = const()[name = tensor("op_136_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_136_end_mask_0 = const()[name = tensor("op_136_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_136_cast_fp16 = slice_by_index(begin = var_136_begin_0, end = var_136_end_0, end_mask = var_136_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_136_cast_fp16")]; + tensor var_142_begin_0 = const()[name = tensor("op_142_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_142_end_0 = const()[name = tensor("op_142_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_142_end_mask_0 = const()[name = tensor("op_142_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_142_cast_fp16 = slice_by_index(begin = var_142_begin_0, end = var_142_end_0, end_mask = var_142_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_142_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_144_cast_fp16 = mul(x = var_142_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_144_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_17, interleave = rotated_1_interleave_0, values = (var_144_cast_fp16, var_136_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_147_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_147_cast_fp16")]; + tensor var_148_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_148_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_147_cast_fp16, y = var_148_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor var_167_begin_0 = const()[name = tensor("op_167_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_167_end_0 = const()[name = tensor("op_167_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_167_end_mask_0 = const()[name = tensor("op_167_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_167_cast_fp16 = slice_by_index(begin = var_167_begin_0, end = var_167_end_0, end_mask = var_167_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_167_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_169_cast_fp16 = mul(x = var_167_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_169_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_17, interleave = rotated_3_interleave_0, values = (var_169_cast_fp16, var_161_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_172_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_172_cast_fp16")]; + tensor var_173_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_173_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_172_cast_fp16, y = var_173_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_176 = const()[name = tensor("op_176"), val = tensor([1, -1, 1, 64])]; + tensor k_7_cast_fp16 = reshape(shape = var_176, x = roped_3_cast_fp16)[name = tensor("k_7_cast_fp16")]; + tensor var_178 = const()[name = tensor("op_178"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_178, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_11_interleave_0 = const()[name = tensor("k_11_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_9_perm_0, x = k_7_cast_fp16)[name = tensor("transpose_1")]; + tensor k_11_cast_fp16 = concat(axis = var_19, interleave = k_11_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_11_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_13, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1, 3072, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_186, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_191_begin_0 = const()[name = tensor("op_191_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_191_end_0 = const()[name = tensor("op_191_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_191_end_mask_0 = const()[name = tensor("op_191_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_191_cast_fp16 = slice_by_index(begin = var_191_begin_0, end = var_191_end_0, end_mask = var_191_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_191_cast_fp16")]; + tensor var_195_begin_0 = const()[name = tensor("op_195_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_195_end_0 = const()[name = tensor("op_195_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_195_end_mask_0 = const()[name = tensor("op_195_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_195_cast_fp16 = slice_by_index(begin = var_195_begin_0, end = var_195_end_0, end_mask = var_195_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_195_cast_fp16")]; + tensor var_199_begin_0 = const()[name = tensor("op_199_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_199_end_0 = const()[name = tensor("op_199_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_199_end_mask_0 = const()[name = tensor("op_199_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_199_cast_fp16 = slice_by_index(begin = var_199_begin_0, end = var_199_end_0, end_mask = var_199_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_199_cast_fp16")]; + tensor var_203_begin_0 = const()[name = tensor("op_203_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_203_end_0 = const()[name = tensor("op_203_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_203_end_mask_0 = const()[name = tensor("op_203_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_203_cast_fp16 = slice_by_index(begin = var_203_begin_0, end = var_203_end_0, end_mask = var_203_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_203_cast_fp16")]; + tensor var_207_begin_0 = const()[name = tensor("op_207_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_207_end_0 = const()[name = tensor("op_207_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_207_end_mask_0 = const()[name = tensor("op_207_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = var_207_end_0, end_mask = var_207_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_207_cast_fp16")]; + tensor var_211_begin_0 = const()[name = tensor("op_211_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_211_end_0 = const()[name = tensor("op_211_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_211_end_mask_0 = const()[name = tensor("op_211_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_211_cast_fp16 = slice_by_index(begin = var_211_begin_0, end = var_211_end_0, end_mask = var_211_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_211_cast_fp16")]; + tensor var_215_begin_0 = const()[name = tensor("op_215_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_215_end_0 = const()[name = tensor("op_215_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_215_end_mask_0 = const()[name = tensor("op_215_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_215_cast_fp16 = slice_by_index(begin = var_215_begin_0, end = var_215_end_0, end_mask = var_215_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_215_cast_fp16")]; + tensor var_219_begin_0 = const()[name = tensor("op_219_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_219_end_0 = const()[name = tensor("op_219_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_219_end_mask_0 = const()[name = tensor("op_219_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_219_cast_fp16 = slice_by_index(begin = var_219_begin_0, end = var_219_end_0, end_mask = var_219_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_219_cast_fp16")]; + tensor var_223_begin_0 = const()[name = tensor("op_223_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_223_end_0 = const()[name = tensor("op_223_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_223_end_mask_0 = const()[name = tensor("op_223_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_223_cast_fp16 = slice_by_index(begin = var_223_begin_0, end = var_223_end_0, end_mask = var_223_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_223_cast_fp16")]; + tensor var_227_begin_0 = const()[name = tensor("op_227_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_227_end_0 = const()[name = tensor("op_227_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_227_end_mask_0 = const()[name = tensor("op_227_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_227_cast_fp16")]; + tensor var_231_begin_0 = const()[name = tensor("op_231_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_231_end_0 = const()[name = tensor("op_231_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_231_end_mask_0 = const()[name = tensor("op_231_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_231_cast_fp16 = slice_by_index(begin = var_231_begin_0, end = var_231_end_0, end_mask = var_231_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_231_cast_fp16")]; + tensor var_235_begin_0 = const()[name = tensor("op_235_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_235_end_0 = const()[name = tensor("op_235_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_235_end_mask_0 = const()[name = tensor("op_235_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = var_235_end_0, end_mask = var_235_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_235_cast_fp16")]; + tensor var_239_begin_0 = const()[name = tensor("op_239_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_239_end_0 = const()[name = tensor("op_239_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_239_end_mask_0 = const()[name = tensor("op_239_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_239_cast_fp16")]; + tensor var_243_begin_0 = const()[name = tensor("op_243_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_243_end_0 = const()[name = tensor("op_243_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_243_end_mask_0 = const()[name = tensor("op_243_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_243_cast_fp16")]; + tensor var_247_begin_0 = const()[name = tensor("op_247_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_247_end_0 = const()[name = tensor("op_247_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_247_end_mask_0 = const()[name = tensor("op_247_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_247_cast_fp16")]; + tensor var_251_begin_0 = const()[name = tensor("op_251_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_251_end_0 = const()[name = tensor("op_251_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_251_end_mask_0 = const()[name = tensor("op_251_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_251_cast_fp16")]; + tensor var_255_begin_0 = const()[name = tensor("op_255_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_255_end_0 = const()[name = tensor("op_255_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_255_end_mask_0 = const()[name = tensor("op_255_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_255_cast_fp16")]; + tensor var_259_begin_0 = const()[name = tensor("op_259_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_259_end_0 = const()[name = tensor("op_259_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_259_end_mask_0 = const()[name = tensor("op_259_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_263_cast_fp16")]; + tensor var_267_begin_0 = const()[name = tensor("op_267_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_267_end_0 = const()[name = tensor("op_267_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_267_end_mask_0 = const()[name = tensor("op_267_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_267_cast_fp16")]; + tensor var_271_begin_0 = const()[name = tensor("op_271_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_271_end_0 = const()[name = tensor("op_271_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_271_end_mask_0 = const()[name = tensor("op_271_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_271_cast_fp16")]; + tensor var_275_begin_0 = const()[name = tensor("op_275_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_275_end_0 = const()[name = tensor("op_275_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_275_end_mask_0 = const()[name = tensor("op_275_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_275_cast_fp16")]; + tensor var_279_begin_0 = const()[name = tensor("op_279_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_279_end_0 = const()[name = tensor("op_279_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_279_end_mask_0 = const()[name = tensor("op_279_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_279_cast_fp16")]; + tensor var_283_begin_0 = const()[name = tensor("op_283_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_283_end_0 = const()[name = tensor("op_283_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_283_end_mask_0 = const()[name = tensor("op_283_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_283_cast_fp16")]; + tensor var_289_begin_0 = const()[name = tensor("op_289_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_289_end_0 = const()[name = tensor("op_289_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_289_end_mask_0 = const()[name = tensor("op_289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_289_cast_fp16")]; + tensor var_301_begin_0 = const()[name = tensor("op_301_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_301_end_0 = const()[name = tensor("op_301_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_301_end_mask_0 = const()[name = tensor("op_301_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_301_cast_fp16")]; + tensor var_313_begin_0 = const()[name = tensor("op_313_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_313_end_0 = const()[name = tensor("op_313_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_313_end_mask_0 = const()[name = tensor("op_313_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_313_cast_fp16 = slice_by_index(begin = var_313_begin_0, end = var_313_end_0, end_mask = var_313_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_313_cast_fp16")]; + tensor var_325_begin_0 = const()[name = tensor("op_325_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_325_end_0 = const()[name = tensor("op_325_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_325_end_mask_0 = const()[name = tensor("op_325_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_325_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_349_begin_0 = const()[name = tensor("op_349_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_349_end_0 = const()[name = tensor("op_349_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_349_end_mask_0 = const()[name = tensor("op_349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_349_cast_fp16 = slice_by_index(begin = var_349_begin_0, end = var_349_end_0, end_mask = var_349_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_349_cast_fp16")]; + tensor var_361_begin_0 = const()[name = tensor("op_361_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_361_end_0 = const()[name = tensor("op_361_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_361_end_mask_0 = const()[name = tensor("op_361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_361_cast_fp16 = slice_by_index(begin = var_361_begin_0, end = var_361_end_0, end_mask = var_361_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_361_cast_fp16")]; + tensor var_373_begin_0 = const()[name = tensor("op_373_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_373_end_0 = const()[name = tensor("op_373_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_373_end_mask_0 = const()[name = tensor("op_373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_373_cast_fp16")]; + tensor var_383_begin_0 = const()[name = tensor("op_383_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_383_end_0 = const()[name = tensor("op_383_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_383_end_mask_0 = const()[name = tensor("op_383_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_383_cast_fp16")]; + tensor var_395_begin_0 = const()[name = tensor("op_395_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_395_end_0 = const()[name = tensor("op_395_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_395_end_mask_0 = const()[name = tensor("op_395_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_395_cast_fp16")]; + tensor var_407_begin_0 = const()[name = tensor("op_407_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_407_end_0 = const()[name = tensor("op_407_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_407_end_mask_0 = const()[name = tensor("op_407_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_407_cast_fp16 = slice_by_index(begin = var_407_begin_0, end = var_407_end_0, end_mask = var_407_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_407_cast_fp16")]; + tensor var_419_begin_0 = const()[name = tensor("op_419_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_419_end_0 = const()[name = tensor("op_419_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_419_end_mask_0 = const()[name = tensor("op_419_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_419_cast_fp16 = slice_by_index(begin = var_419_begin_0, end = var_419_end_0, end_mask = var_419_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_419_cast_fp16")]; + tensor var_431_begin_0 = const()[name = tensor("op_431_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_431_end_0 = const()[name = tensor("op_431_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_431_end_mask_0 = const()[name = tensor("op_431_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_431_cast_fp16 = slice_by_index(begin = var_431_begin_0, end = var_431_end_0, end_mask = var_431_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_431_cast_fp16")]; + tensor var_443_begin_0 = const()[name = tensor("op_443_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_443_end_0 = const()[name = tensor("op_443_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_443_end_mask_0 = const()[name = tensor("op_443_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_443_cast_fp16 = slice_by_index(begin = var_443_begin_0, end = var_443_end_0, end_mask = var_443_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_443_cast_fp16")]; + tensor var_455_begin_0 = const()[name = tensor("op_455_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_455_end_0 = const()[name = tensor("op_455_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_455_end_mask_0 = const()[name = tensor("op_455_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_455_cast_fp16")]; + tensor var_467_begin_0 = const()[name = tensor("op_467_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_467_end_0 = const()[name = tensor("op_467_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_467_end_mask_0 = const()[name = tensor("op_467_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_467_cast_fp16 = slice_by_index(begin = var_467_begin_0, end = var_467_end_0, end_mask = var_467_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_467_cast_fp16")]; + tensor var_479_equation_0 = const()[name = tensor("op_479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_289_cast_fp16, var_191_cast_fp16))[name = tensor("op_479_cast_fp16")]; + tensor var_480_to_fp16 = const()[name = tensor("op_480_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_481_cast_fp16 = mul(x = var_479_cast_fp16, y = var_480_to_fp16)[name = tensor("op_481_cast_fp16")]; + tensor var_483_equation_0 = const()[name = tensor("op_483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_289_cast_fp16, var_195_cast_fp16))[name = tensor("op_483_cast_fp16")]; + tensor var_484_to_fp16 = const()[name = tensor("op_484_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_485_cast_fp16 = mul(x = var_483_cast_fp16, y = var_484_to_fp16)[name = tensor("op_485_cast_fp16")]; + tensor var_487_equation_0 = const()[name = tensor("op_487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_487_cast_fp16 = einsum(equation = var_487_equation_0, values = (var_289_cast_fp16, var_199_cast_fp16))[name = tensor("op_487_cast_fp16")]; + tensor var_488_to_fp16 = const()[name = tensor("op_488_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_489_cast_fp16 = mul(x = var_487_cast_fp16, y = var_488_to_fp16)[name = tensor("op_489_cast_fp16")]; + tensor var_491_equation_0 = const()[name = tensor("op_491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_491_cast_fp16 = einsum(equation = var_491_equation_0, values = (var_301_cast_fp16, var_203_cast_fp16))[name = tensor("op_491_cast_fp16")]; + tensor var_492_to_fp16 = const()[name = tensor("op_492_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_493_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = tensor("op_493_cast_fp16")]; + tensor var_495_equation_0 = const()[name = tensor("op_495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_495_cast_fp16 = einsum(equation = var_495_equation_0, values = (var_301_cast_fp16, var_207_cast_fp16))[name = tensor("op_495_cast_fp16")]; + tensor var_496_to_fp16 = const()[name = tensor("op_496_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_497_cast_fp16 = mul(x = var_495_cast_fp16, y = var_496_to_fp16)[name = tensor("op_497_cast_fp16")]; + tensor var_499_equation_0 = const()[name = tensor("op_499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_499_cast_fp16 = einsum(equation = var_499_equation_0, values = (var_301_cast_fp16, var_211_cast_fp16))[name = tensor("op_499_cast_fp16")]; + tensor var_500_to_fp16 = const()[name = tensor("op_500_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_501_cast_fp16 = mul(x = var_499_cast_fp16, y = var_500_to_fp16)[name = tensor("op_501_cast_fp16")]; + tensor var_503_equation_0 = const()[name = tensor("op_503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_503_cast_fp16 = einsum(equation = var_503_equation_0, values = (var_313_cast_fp16, var_215_cast_fp16))[name = tensor("op_503_cast_fp16")]; + tensor var_504_to_fp16 = const()[name = tensor("op_504_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_505_cast_fp16 = mul(x = var_503_cast_fp16, y = var_504_to_fp16)[name = tensor("op_505_cast_fp16")]; + tensor var_507_equation_0 = const()[name = tensor("op_507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_507_cast_fp16 = einsum(equation = var_507_equation_0, values = (var_313_cast_fp16, var_219_cast_fp16))[name = tensor("op_507_cast_fp16")]; + tensor var_508_to_fp16 = const()[name = tensor("op_508_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16, y = var_508_to_fp16)[name = tensor("op_509_cast_fp16")]; + tensor var_511_equation_0 = const()[name = tensor("op_511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_511_cast_fp16 = einsum(equation = var_511_equation_0, values = (var_313_cast_fp16, var_223_cast_fp16))[name = tensor("op_511_cast_fp16")]; + tensor var_512_to_fp16 = const()[name = tensor("op_512_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_513_cast_fp16 = mul(x = var_511_cast_fp16, y = var_512_to_fp16)[name = tensor("op_513_cast_fp16")]; + tensor var_515_equation_0 = const()[name = tensor("op_515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_515_cast_fp16 = einsum(equation = var_515_equation_0, values = (var_325_cast_fp16, var_227_cast_fp16))[name = tensor("op_515_cast_fp16")]; + tensor var_516_to_fp16 = const()[name = tensor("op_516_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_517_cast_fp16 = mul(x = var_515_cast_fp16, y = var_516_to_fp16)[name = tensor("op_517_cast_fp16")]; + tensor var_519_equation_0 = const()[name = tensor("op_519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_519_cast_fp16 = einsum(equation = var_519_equation_0, values = (var_325_cast_fp16, var_231_cast_fp16))[name = tensor("op_519_cast_fp16")]; + tensor var_520_to_fp16 = const()[name = tensor("op_520_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_521_cast_fp16 = mul(x = var_519_cast_fp16, y = var_520_to_fp16)[name = tensor("op_521_cast_fp16")]; + tensor var_523_equation_0 = const()[name = tensor("op_523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_325_cast_fp16, var_235_cast_fp16))[name = tensor("op_523_cast_fp16")]; + tensor var_524_to_fp16 = const()[name = tensor("op_524_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_525_cast_fp16 = mul(x = var_523_cast_fp16, y = var_524_to_fp16)[name = tensor("op_525_cast_fp16")]; + tensor var_527_equation_0 = const()[name = tensor("op_527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_337_cast_fp16, var_239_cast_fp16))[name = tensor("op_527_cast_fp16")]; + tensor var_528_to_fp16 = const()[name = tensor("op_528_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_529_cast_fp16 = mul(x = var_527_cast_fp16, y = var_528_to_fp16)[name = tensor("op_529_cast_fp16")]; + tensor var_531_equation_0 = const()[name = tensor("op_531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_337_cast_fp16, var_243_cast_fp16))[name = tensor("op_531_cast_fp16")]; + tensor var_532_to_fp16 = const()[name = tensor("op_532_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_533_cast_fp16 = mul(x = var_531_cast_fp16, y = var_532_to_fp16)[name = tensor("op_533_cast_fp16")]; + tensor var_535_equation_0 = const()[name = tensor("op_535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_337_cast_fp16, var_247_cast_fp16))[name = tensor("op_535_cast_fp16")]; + tensor var_536_to_fp16 = const()[name = tensor("op_536_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_537_cast_fp16 = mul(x = var_535_cast_fp16, y = var_536_to_fp16)[name = tensor("op_537_cast_fp16")]; + tensor var_539_equation_0 = const()[name = tensor("op_539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_539_cast_fp16 = einsum(equation = var_539_equation_0, values = (var_349_cast_fp16, var_251_cast_fp16))[name = tensor("op_539_cast_fp16")]; + tensor var_540_to_fp16 = const()[name = tensor("op_540_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_541_cast_fp16 = mul(x = var_539_cast_fp16, y = var_540_to_fp16)[name = tensor("op_541_cast_fp16")]; + tensor var_543_equation_0 = const()[name = tensor("op_543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_543_cast_fp16 = einsum(equation = var_543_equation_0, values = (var_349_cast_fp16, var_255_cast_fp16))[name = tensor("op_543_cast_fp16")]; + tensor var_544_to_fp16 = const()[name = tensor("op_544_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_545_cast_fp16 = mul(x = var_543_cast_fp16, y = var_544_to_fp16)[name = tensor("op_545_cast_fp16")]; + tensor var_547_equation_0 = const()[name = tensor("op_547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_349_cast_fp16, var_259_cast_fp16))[name = tensor("op_547_cast_fp16")]; + tensor var_548_to_fp16 = const()[name = tensor("op_548_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_549_cast_fp16 = mul(x = var_547_cast_fp16, y = var_548_to_fp16)[name = tensor("op_549_cast_fp16")]; + tensor var_551_equation_0 = const()[name = tensor("op_551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_361_cast_fp16, var_263_cast_fp16))[name = tensor("op_551_cast_fp16")]; + tensor var_552_to_fp16 = const()[name = tensor("op_552_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_553_cast_fp16 = mul(x = var_551_cast_fp16, y = var_552_to_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_555_equation_0 = const()[name = tensor("op_555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_361_cast_fp16, var_267_cast_fp16))[name = tensor("op_555_cast_fp16")]; + tensor var_556_to_fp16 = const()[name = tensor("op_556_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_557_cast_fp16 = mul(x = var_555_cast_fp16, y = var_556_to_fp16)[name = tensor("op_557_cast_fp16")]; + tensor var_559_equation_0 = const()[name = tensor("op_559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_361_cast_fp16, var_271_cast_fp16))[name = tensor("op_559_cast_fp16")]; + tensor var_560_to_fp16 = const()[name = tensor("op_560_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = var_560_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor var_563_equation_0 = const()[name = tensor("op_563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_373_cast_fp16, var_275_cast_fp16))[name = tensor("op_563_cast_fp16")]; + tensor var_564_to_fp16 = const()[name = tensor("op_564_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_565_cast_fp16 = mul(x = var_563_cast_fp16, y = var_564_to_fp16)[name = tensor("op_565_cast_fp16")]; + tensor var_567_equation_0 = const()[name = tensor("op_567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_373_cast_fp16, var_279_cast_fp16))[name = tensor("op_567_cast_fp16")]; + tensor var_568_to_fp16 = const()[name = tensor("op_568_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_569_cast_fp16 = mul(x = var_567_cast_fp16, y = var_568_to_fp16)[name = tensor("op_569_cast_fp16")]; + tensor var_571_equation_0 = const()[name = tensor("op_571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_373_cast_fp16, var_283_cast_fp16))[name = tensor("op_571_cast_fp16")]; + tensor var_572_to_fp16 = const()[name = tensor("op_572_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_573_cast_fp16 = mul(x = var_571_cast_fp16, y = var_572_to_fp16)[name = tensor("op_573_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_481_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_485_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_489_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_493_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_497_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_501_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_505_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_509_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_513_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_517_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_521_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_525_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_529_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_533_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_537_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_541_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_545_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_549_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_553_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_557_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_561_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_565_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_569_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_573_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor var_598_cast_fp16 = softmax(axis = var_52, x = aw_1_cast_fp16)[name = tensor("op_598_cast_fp16")]; + tensor var_599_cast_fp16 = softmax(axis = var_52, x = aw_3_cast_fp16)[name = tensor("op_599_cast_fp16")]; + tensor var_600_cast_fp16 = softmax(axis = var_52, x = aw_5_cast_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_601_cast_fp16 = softmax(axis = var_52, x = aw_7_cast_fp16)[name = tensor("op_601_cast_fp16")]; + tensor var_602_cast_fp16 = softmax(axis = var_52, x = aw_9_cast_fp16)[name = tensor("op_602_cast_fp16")]; + tensor var_603_cast_fp16 = softmax(axis = var_52, x = aw_11_cast_fp16)[name = tensor("op_603_cast_fp16")]; + tensor var_604_cast_fp16 = softmax(axis = var_52, x = aw_13_cast_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_605_cast_fp16 = softmax(axis = var_52, x = aw_15_cast_fp16)[name = tensor("op_605_cast_fp16")]; + tensor var_606_cast_fp16 = softmax(axis = var_52, x = aw_17_cast_fp16)[name = tensor("op_606_cast_fp16")]; + tensor var_607_cast_fp16 = softmax(axis = var_52, x = aw_19_cast_fp16)[name = tensor("op_607_cast_fp16")]; + tensor var_608_cast_fp16 = softmax(axis = var_52, x = aw_21_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_609_cast_fp16 = softmax(axis = var_52, x = aw_23_cast_fp16)[name = tensor("op_609_cast_fp16")]; + tensor var_610_cast_fp16 = softmax(axis = var_52, x = aw_25_cast_fp16)[name = tensor("op_610_cast_fp16")]; + tensor var_611_cast_fp16 = softmax(axis = var_52, x = aw_27_cast_fp16)[name = tensor("op_611_cast_fp16")]; + tensor var_612_cast_fp16 = softmax(axis = var_52, x = aw_29_cast_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_613_cast_fp16 = softmax(axis = var_52, x = aw_31_cast_fp16)[name = tensor("op_613_cast_fp16")]; + tensor var_614_cast_fp16 = softmax(axis = var_52, x = aw_33_cast_fp16)[name = tensor("op_614_cast_fp16")]; + tensor var_615_cast_fp16 = softmax(axis = var_52, x = aw_35_cast_fp16)[name = tensor("op_615_cast_fp16")]; + tensor var_616_cast_fp16 = softmax(axis = var_52, x = aw_37_cast_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_617_cast_fp16 = softmax(axis = var_52, x = aw_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618_cast_fp16 = softmax(axis = var_52, x = aw_41_cast_fp16)[name = tensor("op_618_cast_fp16")]; + tensor var_619_cast_fp16 = softmax(axis = var_52, x = aw_43_cast_fp16)[name = tensor("op_619_cast_fp16")]; + tensor var_620_cast_fp16 = softmax(axis = var_52, x = aw_45_cast_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_621_cast_fp16 = softmax(axis = var_52, x = aw_47_cast_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_623_equation_0 = const()[name = tensor("op_623_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_383_cast_fp16, var_598_cast_fp16))[name = tensor("op_623_cast_fp16")]; + tensor var_625_equation_0 = const()[name = tensor("op_625_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_383_cast_fp16, var_599_cast_fp16))[name = tensor("op_625_cast_fp16")]; + tensor var_627_equation_0 = const()[name = tensor("op_627_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_383_cast_fp16, var_600_cast_fp16))[name = tensor("op_627_cast_fp16")]; + tensor var_629_equation_0 = const()[name = tensor("op_629_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_395_cast_fp16, var_601_cast_fp16))[name = tensor("op_629_cast_fp16")]; + tensor var_631_equation_0 = const()[name = tensor("op_631_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_395_cast_fp16, var_602_cast_fp16))[name = tensor("op_631_cast_fp16")]; + tensor var_633_equation_0 = const()[name = tensor("op_633_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_395_cast_fp16, var_603_cast_fp16))[name = tensor("op_633_cast_fp16")]; + tensor var_635_equation_0 = const()[name = tensor("op_635_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_407_cast_fp16, var_604_cast_fp16))[name = tensor("op_635_cast_fp16")]; + tensor var_637_equation_0 = const()[name = tensor("op_637_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_407_cast_fp16, var_605_cast_fp16))[name = tensor("op_637_cast_fp16")]; + tensor var_639_equation_0 = const()[name = tensor("op_639_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_407_cast_fp16, var_606_cast_fp16))[name = tensor("op_639_cast_fp16")]; + tensor var_641_equation_0 = const()[name = tensor("op_641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_419_cast_fp16, var_607_cast_fp16))[name = tensor("op_641_cast_fp16")]; + tensor var_643_equation_0 = const()[name = tensor("op_643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_419_cast_fp16, var_608_cast_fp16))[name = tensor("op_643_cast_fp16")]; + tensor var_645_equation_0 = const()[name = tensor("op_645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_419_cast_fp16, var_609_cast_fp16))[name = tensor("op_645_cast_fp16")]; + tensor var_647_equation_0 = const()[name = tensor("op_647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_647_cast_fp16 = einsum(equation = var_647_equation_0, values = (var_431_cast_fp16, var_610_cast_fp16))[name = tensor("op_647_cast_fp16")]; + tensor var_649_equation_0 = const()[name = tensor("op_649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_649_cast_fp16 = einsum(equation = var_649_equation_0, values = (var_431_cast_fp16, var_611_cast_fp16))[name = tensor("op_649_cast_fp16")]; + tensor var_651_equation_0 = const()[name = tensor("op_651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_651_cast_fp16 = einsum(equation = var_651_equation_0, values = (var_431_cast_fp16, var_612_cast_fp16))[name = tensor("op_651_cast_fp16")]; + tensor var_653_equation_0 = const()[name = tensor("op_653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_653_cast_fp16 = einsum(equation = var_653_equation_0, values = (var_443_cast_fp16, var_613_cast_fp16))[name = tensor("op_653_cast_fp16")]; + tensor var_655_equation_0 = const()[name = tensor("op_655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_655_cast_fp16 = einsum(equation = var_655_equation_0, values = (var_443_cast_fp16, var_614_cast_fp16))[name = tensor("op_655_cast_fp16")]; + tensor var_657_equation_0 = const()[name = tensor("op_657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_657_cast_fp16 = einsum(equation = var_657_equation_0, values = (var_443_cast_fp16, var_615_cast_fp16))[name = tensor("op_657_cast_fp16")]; + tensor var_659_equation_0 = const()[name = tensor("op_659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_659_cast_fp16 = einsum(equation = var_659_equation_0, values = (var_455_cast_fp16, var_616_cast_fp16))[name = tensor("op_659_cast_fp16")]; + tensor var_661_equation_0 = const()[name = tensor("op_661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_661_cast_fp16 = einsum(equation = var_661_equation_0, values = (var_455_cast_fp16, var_617_cast_fp16))[name = tensor("op_661_cast_fp16")]; + tensor var_663_equation_0 = const()[name = tensor("op_663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_663_cast_fp16 = einsum(equation = var_663_equation_0, values = (var_455_cast_fp16, var_618_cast_fp16))[name = tensor("op_663_cast_fp16")]; + tensor var_665_equation_0 = const()[name = tensor("op_665_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_665_cast_fp16 = einsum(equation = var_665_equation_0, values = (var_467_cast_fp16, var_619_cast_fp16))[name = tensor("op_665_cast_fp16")]; + tensor var_667_equation_0 = const()[name = tensor("op_667_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_667_cast_fp16 = einsum(equation = var_667_equation_0, values = (var_467_cast_fp16, var_620_cast_fp16))[name = tensor("op_667_cast_fp16")]; + tensor var_669_equation_0 = const()[name = tensor("op_669_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_669_cast_fp16 = einsum(equation = var_669_equation_0, values = (var_467_cast_fp16, var_621_cast_fp16))[name = tensor("op_669_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_52, interleave = x_11_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16, var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16, var_647_cast_fp16, var_649_cast_fp16, var_651_cast_fp16, var_653_cast_fp16, var_655_cast_fp16, var_657_cast_fp16, var_659_cast_fp16, var_661_cast_fp16, var_663_cast_fp16, var_665_cast_fp16, var_667_cast_fp16, var_669_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_674 = const()[name = tensor("op_674"), val = tensor([1, 3072, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_674, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_677 = const()[name = tensor("op_677"), val = tensor([1, 1])]; + tensor var_679 = const()[name = tensor("op_679"), val = tensor([1, 1])]; + tensor attention_output_1_pad_type_0 = const()[name = tensor("attention_output_1_pad_type_0"), val = tensor("custom")]; + tensor attention_output_1_pad_0 = const()[name = tensor("attention_output_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31463936)))]; + tensor attention_output_1_cast_fp16 = conv(dilations = var_679, groups = var_52, pad = attention_output_1_pad_0, pad_type = attention_output_1_pad_type_0, strides = var_677, weight = blocks_0_attn_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338368)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_52, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_55, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_705_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338560)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_716 = const()[name = tensor("op_716"), val = tensor([1, 1])]; + tensor var_718 = const()[name = tensor("op_718"), val = tensor([1, 1])]; + tensor input_7_pad_type_0 = const()[name = tensor("input_7_pad_type_0"), val = tensor("custom")]; + tensor input_7_pad_0 = const()[name = tensor("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50344768)))]; + tensor input_7_cast_fp16 = conv(dilations = var_718, groups = var_52, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_716, weight = blocks_0_mlp_fc_1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_722 = const()[name = tensor("op_722"), val = tensor([1, 1])]; + tensor var_724 = const()[name = tensor("op_724"), val = tensor([1, 1])]; + tensor x_fc_2_1_pad_type_0 = const()[name = tensor("x_fc_2_1_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_1_pad_0 = const()[name = tensor("x_fc_2_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100676480)))]; + tensor x_fc_2_1_cast_fp16 = conv(dilations = var_724, groups = var_52, pad = x_fc_2_1_pad_0, pad_type = x_fc_2_1_pad_type_0, strides = var_722, weight = blocks_0_mlp_fc_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_727_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_727_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_727_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_730 = const()[name = tensor("op_730"), val = tensor([1, 1])]; + tensor var_732 = const()[name = tensor("op_732"), val = tensor([1, 1])]; + tensor var_734_pad_type_0 = const()[name = tensor("op_734_pad_type_0"), val = tensor("custom")]; + tensor var_734_pad_0 = const()[name = tensor("op_734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151008192)))]; + tensor var_734_cast_fp16 = conv(dilations = var_732, groups = var_52, pad = var_734_pad_0, pad_type = var_734_pad_type_0, strides = var_730, weight = blocks_0_mlp_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("op_734_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_734_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_740 = const()[name = tensor("op_740"), val = tensor(-1)]; + tensor var_744 = const()[name = tensor("op_744"), val = tensor(-2)]; + tensor var_746 = const()[name = tensor("op_746"), val = tensor(-3)]; + tensor var_779 = const()[name = tensor("op_779"), val = tensor(1)]; + tensor var_782 = const()[name = tensor("op_782"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201339904)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_779, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_782, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_805_to_fp16 = const()[name = tensor("op_805_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_805_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201340096)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_829 = const()[name = tensor("op_829"), val = tensor([1, 3072, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_829, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_832 = const()[name = tensor("op_832"), val = tensor([1, 1])]; + tensor var_834 = const()[name = tensor("op_834"), val = tensor([1, 1])]; + tensor q_9_pad_type_0 = const()[name = tensor("q_9_pad_type_0"), val = tensor("custom")]; + tensor q_9_pad_0 = const()[name = tensor("q_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201346304)))]; + tensor q_9_cast_fp16 = conv(dilations = var_834, groups = var_779, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = var_832, weight = blocks_1_attn_q_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_838 = const()[name = tensor("op_838"), val = tensor([1, 1])]; + tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 1])]; + tensor k_13_pad_type_0 = const()[name = tensor("k_13_pad_type_0"), val = tensor("custom")]; + tensor k_13_pad_0 = const()[name = tensor("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220220736)))]; + tensor k_13_cast_fp16 = conv(dilations = var_840, groups = var_779, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = var_838, weight = blocks_1_attn_k_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_844 = const()[name = tensor("op_844"), val = tensor([1, 1])]; + tensor var_846 = const()[name = tensor("op_846"), val = tensor([1, 1])]; + tensor v_11_pad_type_0 = const()[name = tensor("v_11_pad_type_0"), val = tensor("custom")]; + tensor v_11_pad_0 = const()[name = tensor("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226512256)))]; + tensor v_11_cast_fp16 = conv(dilations = var_846, groups = var_779, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = var_844, weight = blocks_1_attn_v_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_849 = const()[name = tensor("op_849"), val = tensor([1, 24, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_849, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_851 = const()[name = tensor("op_851"), val = tensor([1, -1, 128, 64])]; + tensor k_15_cast_fp16 = reshape(shape = var_851, x = k_13_cast_fp16)[name = tensor("k_15_cast_fp16")]; + tensor var_865_begin_0 = const()[name = tensor("op_865_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_865_end_0 = const()[name = tensor("op_865_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_865_end_mask_0 = const()[name = tensor("op_865_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_865_cast_fp16")]; + tensor var_871_begin_0 = const()[name = tensor("op_871_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_871_end_0 = const()[name = tensor("op_871_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_871_end_mask_0 = const()[name = tensor("op_871_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_871_cast_fp16 = slice_by_index(begin = var_871_begin_0, end = var_871_end_0, end_mask = var_871_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_871_cast_fp16")]; + tensor const_30_promoted_to_fp16 = const()[name = tensor("const_30_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_873_cast_fp16 = mul(x = var_871_cast_fp16, y = const_30_promoted_to_fp16)[name = tensor("op_873_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_744, interleave = rotated_5_interleave_0, values = (var_873_cast_fp16, var_865_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_876_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_876_cast_fp16")]; + tensor var_877_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_877_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_876_cast_fp16, y = var_877_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_890_begin_0 = const()[name = tensor("op_890_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_890_end_0 = const()[name = tensor("op_890_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_890_end_mask_0 = const()[name = tensor("op_890_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_890_cast_fp16 = slice_by_index(begin = var_890_begin_0, end = var_890_end_0, end_mask = var_890_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_890_cast_fp16")]; + tensor var_896_begin_0 = const()[name = tensor("op_896_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_896_end_0 = const()[name = tensor("op_896_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_896_end_mask_0 = const()[name = tensor("op_896_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_896_cast_fp16 = slice_by_index(begin = var_896_begin_0, end = var_896_end_0, end_mask = var_896_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_896_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_898_cast_fp16 = mul(x = var_896_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_898_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_744, interleave = rotated_interleave_0, values = (var_898_cast_fp16, var_890_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_901_cast_fp16 = mul(x = k_15_cast_fp16, y = cos)[name = tensor("op_901_cast_fp16")]; + tensor var_902_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_902_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_901_cast_fp16, y = var_902_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_905 = const()[name = tensor("op_905"), val = tensor([1, -1, 1, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_905, x = roped_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_907 = const()[name = tensor("op_907"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_907, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_21_perm_0, x = k_19_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_746, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_740, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_915 = const()[name = tensor("op_915"), val = tensor([1, 3072, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_915, x = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_920_begin_0 = const()[name = tensor("op_920_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_end_0 = const()[name = tensor("op_920_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_920_end_mask_0 = const()[name = tensor("op_920_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = q_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor var_924_begin_0 = const()[name = tensor("op_924_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_924_end_0 = const()[name = tensor("op_924_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_924_end_mask_0 = const()[name = tensor("op_924_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_924_cast_fp16 = slice_by_index(begin = var_924_begin_0, end = var_924_end_0, end_mask = var_924_end_mask_0, x = q_cast_fp16)[name = tensor("op_924_cast_fp16")]; + tensor var_928_begin_0 = const()[name = tensor("op_928_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_928_end_0 = const()[name = tensor("op_928_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_928_end_mask_0 = const()[name = tensor("op_928_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = q_cast_fp16)[name = tensor("op_928_cast_fp16")]; + tensor var_932_begin_0 = const()[name = tensor("op_932_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_932_end_0 = const()[name = tensor("op_932_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_932_end_mask_0 = const()[name = tensor("op_932_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_932_cast_fp16 = slice_by_index(begin = var_932_begin_0, end = var_932_end_0, end_mask = var_932_end_mask_0, x = q_cast_fp16)[name = tensor("op_932_cast_fp16")]; + tensor var_936_begin_0 = const()[name = tensor("op_936_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_936_end_0 = const()[name = tensor("op_936_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_936_end_mask_0 = const()[name = tensor("op_936_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_936_cast_fp16 = slice_by_index(begin = var_936_begin_0, end = var_936_end_0, end_mask = var_936_end_mask_0, x = q_cast_fp16)[name = tensor("op_936_cast_fp16")]; + tensor var_940_begin_0 = const()[name = tensor("op_940_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_940_end_0 = const()[name = tensor("op_940_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_940_end_mask_0 = const()[name = tensor("op_940_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_940_cast_fp16 = slice_by_index(begin = var_940_begin_0, end = var_940_end_0, end_mask = var_940_end_mask_0, x = q_cast_fp16)[name = tensor("op_940_cast_fp16")]; + tensor var_944_begin_0 = const()[name = tensor("op_944_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_944_end_0 = const()[name = tensor("op_944_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_944_end_mask_0 = const()[name = tensor("op_944_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_944_cast_fp16 = slice_by_index(begin = var_944_begin_0, end = var_944_end_0, end_mask = var_944_end_mask_0, x = q_cast_fp16)[name = tensor("op_944_cast_fp16")]; + tensor var_948_begin_0 = const()[name = tensor("op_948_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_948_end_0 = const()[name = tensor("op_948_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_948_end_mask_0 = const()[name = tensor("op_948_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_948_cast_fp16 = slice_by_index(begin = var_948_begin_0, end = var_948_end_0, end_mask = var_948_end_mask_0, x = q_cast_fp16)[name = tensor("op_948_cast_fp16")]; + tensor var_952_begin_0 = const()[name = tensor("op_952_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_952_end_0 = const()[name = tensor("op_952_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_952_end_mask_0 = const()[name = tensor("op_952_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_952_cast_fp16 = slice_by_index(begin = var_952_begin_0, end = var_952_end_0, end_mask = var_952_end_mask_0, x = q_cast_fp16)[name = tensor("op_952_cast_fp16")]; + tensor var_956_begin_0 = const()[name = tensor("op_956_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_956_end_0 = const()[name = tensor("op_956_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_956_end_mask_0 = const()[name = tensor("op_956_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_956_cast_fp16 = slice_by_index(begin = var_956_begin_0, end = var_956_end_0, end_mask = var_956_end_mask_0, x = q_cast_fp16)[name = tensor("op_956_cast_fp16")]; + tensor var_960_begin_0 = const()[name = tensor("op_960_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_960_end_0 = const()[name = tensor("op_960_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_960_end_mask_0 = const()[name = tensor("op_960_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_960_cast_fp16 = slice_by_index(begin = var_960_begin_0, end = var_960_end_0, end_mask = var_960_end_mask_0, x = q_cast_fp16)[name = tensor("op_960_cast_fp16")]; + tensor var_964_begin_0 = const()[name = tensor("op_964_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_964_end_0 = const()[name = tensor("op_964_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_964_end_mask_0 = const()[name = tensor("op_964_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_964_cast_fp16 = slice_by_index(begin = var_964_begin_0, end = var_964_end_0, end_mask = var_964_end_mask_0, x = q_cast_fp16)[name = tensor("op_964_cast_fp16")]; + tensor var_968_begin_0 = const()[name = tensor("op_968_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_968_end_0 = const()[name = tensor("op_968_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_968_end_mask_0 = const()[name = tensor("op_968_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = q_cast_fp16)[name = tensor("op_968_cast_fp16")]; + tensor var_972_begin_0 = const()[name = tensor("op_972_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_972_end_0 = const()[name = tensor("op_972_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_972_end_mask_0 = const()[name = tensor("op_972_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = q_cast_fp16)[name = tensor("op_972_cast_fp16")]; + tensor var_976_begin_0 = const()[name = tensor("op_976_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_976_end_0 = const()[name = tensor("op_976_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_976_end_mask_0 = const()[name = tensor("op_976_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = var_976_end_0, end_mask = var_976_end_mask_0, x = q_cast_fp16)[name = tensor("op_976_cast_fp16")]; + tensor var_980_begin_0 = const()[name = tensor("op_980_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_980_end_0 = const()[name = tensor("op_980_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_980_end_mask_0 = const()[name = tensor("op_980_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_980_cast_fp16 = slice_by_index(begin = var_980_begin_0, end = var_980_end_0, end_mask = var_980_end_mask_0, x = q_cast_fp16)[name = tensor("op_980_cast_fp16")]; + tensor var_984_begin_0 = const()[name = tensor("op_984_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_984_end_0 = const()[name = tensor("op_984_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_984_end_mask_0 = const()[name = tensor("op_984_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_984_cast_fp16 = slice_by_index(begin = var_984_begin_0, end = var_984_end_0, end_mask = var_984_end_mask_0, x = q_cast_fp16)[name = tensor("op_984_cast_fp16")]; + tensor var_988_begin_0 = const()[name = tensor("op_988_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_988_end_0 = const()[name = tensor("op_988_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_988_end_mask_0 = const()[name = tensor("op_988_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_988_cast_fp16 = slice_by_index(begin = var_988_begin_0, end = var_988_end_0, end_mask = var_988_end_mask_0, x = q_cast_fp16)[name = tensor("op_988_cast_fp16")]; + tensor var_992_begin_0 = const()[name = tensor("op_992_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_992_end_0 = const()[name = tensor("op_992_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_992_end_mask_0 = const()[name = tensor("op_992_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_992_cast_fp16 = slice_by_index(begin = var_992_begin_0, end = var_992_end_0, end_mask = var_992_end_mask_0, x = q_cast_fp16)[name = tensor("op_992_cast_fp16")]; + tensor var_996_begin_0 = const()[name = tensor("op_996_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_996_end_0 = const()[name = tensor("op_996_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_996_end_mask_0 = const()[name = tensor("op_996_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_996_cast_fp16 = slice_by_index(begin = var_996_begin_0, end = var_996_end_0, end_mask = var_996_end_mask_0, x = q_cast_fp16)[name = tensor("op_996_cast_fp16")]; + tensor var_1000_begin_0 = const()[name = tensor("op_1000_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1000_end_0 = const()[name = tensor("op_1000_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1000_end_mask_0 = const()[name = tensor("op_1000_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = var_1000_end_0, end_mask = var_1000_end_mask_0, x = q_cast_fp16)[name = tensor("op_1000_cast_fp16")]; + tensor var_1004_begin_0 = const()[name = tensor("op_1004_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1004_end_0 = const()[name = tensor("op_1004_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1004_end_mask_0 = const()[name = tensor("op_1004_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1004_cast_fp16 = slice_by_index(begin = var_1004_begin_0, end = var_1004_end_0, end_mask = var_1004_end_mask_0, x = q_cast_fp16)[name = tensor("op_1004_cast_fp16")]; + tensor var_1008_begin_0 = const()[name = tensor("op_1008_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1008_end_0 = const()[name = tensor("op_1008_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1008_end_mask_0 = const()[name = tensor("op_1008_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = var_1008_end_0, end_mask = var_1008_end_mask_0, x = q_cast_fp16)[name = tensor("op_1008_cast_fp16")]; + tensor var_1012_begin_0 = const()[name = tensor("op_1012_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1012_end_0 = const()[name = tensor("op_1012_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1012_end_mask_0 = const()[name = tensor("op_1012_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1012_cast_fp16 = slice_by_index(begin = var_1012_begin_0, end = var_1012_end_0, end_mask = var_1012_end_mask_0, x = q_cast_fp16)[name = tensor("op_1012_cast_fp16")]; + tensor var_1018_begin_0 = const()[name = tensor("op_1018_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1018_end_0 = const()[name = tensor("op_1018_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1018_end_mask_0 = const()[name = tensor("op_1018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1018_cast_fp16 = slice_by_index(begin = var_1018_begin_0, end = var_1018_end_0, end_mask = var_1018_end_mask_0, x = k_cast_fp16)[name = tensor("op_1018_cast_fp16")]; + tensor var_1030_begin_0 = const()[name = tensor("op_1030_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1030_end_0 = const()[name = tensor("op_1030_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1030_end_mask_0 = const()[name = tensor("op_1030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, x = k_cast_fp16)[name = tensor("op_1030_cast_fp16")]; + tensor var_1042_begin_0 = const()[name = tensor("op_1042_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1042_end_0 = const()[name = tensor("op_1042_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1042_end_mask_0 = const()[name = tensor("op_1042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = k_cast_fp16)[name = tensor("op_1042_cast_fp16")]; + tensor var_1054_begin_0 = const()[name = tensor("op_1054_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1054_end_0 = const()[name = tensor("op_1054_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1054_end_mask_0 = const()[name = tensor("op_1054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1054_cast_fp16 = slice_by_index(begin = var_1054_begin_0, end = var_1054_end_0, end_mask = var_1054_end_mask_0, x = k_cast_fp16)[name = tensor("op_1054_cast_fp16")]; + tensor var_1066_begin_0 = const()[name = tensor("op_1066_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1066_end_0 = const()[name = tensor("op_1066_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1066_end_mask_0 = const()[name = tensor("op_1066_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1066_cast_fp16 = slice_by_index(begin = var_1066_begin_0, end = var_1066_end_0, end_mask = var_1066_end_mask_0, x = k_cast_fp16)[name = tensor("op_1066_cast_fp16")]; + tensor var_1078_begin_0 = const()[name = tensor("op_1078_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1078_end_0 = const()[name = tensor("op_1078_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1078_end_mask_0 = const()[name = tensor("op_1078_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1078_cast_fp16 = slice_by_index(begin = var_1078_begin_0, end = var_1078_end_0, end_mask = var_1078_end_mask_0, x = k_cast_fp16)[name = tensor("op_1078_cast_fp16")]; + tensor var_1090_begin_0 = const()[name = tensor("op_1090_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1090_end_0 = const()[name = tensor("op_1090_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1090_end_mask_0 = const()[name = tensor("op_1090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = k_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor var_1102_begin_0 = const()[name = tensor("op_1102_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1102_end_0 = const()[name = tensor("op_1102_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1102_end_mask_0 = const()[name = tensor("op_1102_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1102_cast_fp16 = slice_by_index(begin = var_1102_begin_0, end = var_1102_end_0, end_mask = var_1102_end_mask_0, x = k_cast_fp16)[name = tensor("op_1102_cast_fp16")]; + tensor var_1112_begin_0 = const()[name = tensor("op_1112_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1112_end_0 = const()[name = tensor("op_1112_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1112_end_mask_0 = const()[name = tensor("op_1112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1112_cast_fp16")]; + tensor var_1124_begin_0 = const()[name = tensor("op_1124_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1124_end_0 = const()[name = tensor("op_1124_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1124_end_mask_0 = const()[name = tensor("op_1124_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1124_cast_fp16")]; + tensor var_1136_begin_0 = const()[name = tensor("op_1136_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1136_end_0 = const()[name = tensor("op_1136_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1136_end_mask_0 = const()[name = tensor("op_1136_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1136_cast_fp16")]; + tensor var_1148_begin_0 = const()[name = tensor("op_1148_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1148_end_0 = const()[name = tensor("op_1148_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1148_end_mask_0 = const()[name = tensor("op_1148_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1148_cast_fp16")]; + tensor var_1160_begin_0 = const()[name = tensor("op_1160_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1160_end_0 = const()[name = tensor("op_1160_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1160_end_mask_0 = const()[name = tensor("op_1160_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1160_cast_fp16")]; + tensor var_1172_begin_0 = const()[name = tensor("op_1172_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1172_end_0 = const()[name = tensor("op_1172_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1172_end_mask_0 = const()[name = tensor("op_1172_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1172_cast_fp16 = slice_by_index(begin = var_1172_begin_0, end = var_1172_end_0, end_mask = var_1172_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1172_cast_fp16")]; + tensor var_1184_begin_0 = const()[name = tensor("op_1184_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1184_end_0 = const()[name = tensor("op_1184_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1184_end_mask_0 = const()[name = tensor("op_1184_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1184_cast_fp16 = slice_by_index(begin = var_1184_begin_0, end = var_1184_end_0, end_mask = var_1184_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1184_cast_fp16")]; + tensor var_1196_begin_0 = const()[name = tensor("op_1196_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1196_end_0 = const()[name = tensor("op_1196_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1196_end_mask_0 = const()[name = tensor("op_1196_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1196_cast_fp16")]; + tensor var_1208_equation_0 = const()[name = tensor("op_1208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1208_cast_fp16 = einsum(equation = var_1208_equation_0, values = (var_1018_cast_fp16, var_920_cast_fp16))[name = tensor("op_1208_cast_fp16")]; + tensor var_1209_to_fp16 = const()[name = tensor("op_1209_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1210_cast_fp16 = mul(x = var_1208_cast_fp16, y = var_1209_to_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1212_equation_0 = const()[name = tensor("op_1212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1212_cast_fp16 = einsum(equation = var_1212_equation_0, values = (var_1018_cast_fp16, var_924_cast_fp16))[name = tensor("op_1212_cast_fp16")]; + tensor var_1213_to_fp16 = const()[name = tensor("op_1213_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1214_cast_fp16 = mul(x = var_1212_cast_fp16, y = var_1213_to_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1216_equation_0 = const()[name = tensor("op_1216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1216_cast_fp16 = einsum(equation = var_1216_equation_0, values = (var_1018_cast_fp16, var_928_cast_fp16))[name = tensor("op_1216_cast_fp16")]; + tensor var_1217_to_fp16 = const()[name = tensor("op_1217_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1218_cast_fp16 = mul(x = var_1216_cast_fp16, y = var_1217_to_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1220_equation_0 = const()[name = tensor("op_1220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1220_cast_fp16 = einsum(equation = var_1220_equation_0, values = (var_1030_cast_fp16, var_932_cast_fp16))[name = tensor("op_1220_cast_fp16")]; + tensor var_1221_to_fp16 = const()[name = tensor("op_1221_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1222_cast_fp16 = mul(x = var_1220_cast_fp16, y = var_1221_to_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1224_equation_0 = const()[name = tensor("op_1224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1224_cast_fp16 = einsum(equation = var_1224_equation_0, values = (var_1030_cast_fp16, var_936_cast_fp16))[name = tensor("op_1224_cast_fp16")]; + tensor var_1225_to_fp16 = const()[name = tensor("op_1225_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1226_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1030_cast_fp16, var_940_cast_fp16))[name = tensor("op_1228_cast_fp16")]; + tensor var_1229_to_fp16 = const()[name = tensor("op_1229_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1230_cast_fp16 = mul(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1042_cast_fp16, var_944_cast_fp16))[name = tensor("op_1232_cast_fp16")]; + tensor var_1233_to_fp16 = const()[name = tensor("op_1233_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1234_cast_fp16 = mul(x = var_1232_cast_fp16, y = var_1233_to_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1042_cast_fp16, var_948_cast_fp16))[name = tensor("op_1236_cast_fp16")]; + tensor var_1237_to_fp16 = const()[name = tensor("op_1237_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1238_cast_fp16 = mul(x = var_1236_cast_fp16, y = var_1237_to_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1042_cast_fp16, var_952_cast_fp16))[name = tensor("op_1240_cast_fp16")]; + tensor var_1241_to_fp16 = const()[name = tensor("op_1241_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1242_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1054_cast_fp16, var_956_cast_fp16))[name = tensor("op_1244_cast_fp16")]; + tensor var_1245_to_fp16 = const()[name = tensor("op_1245_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1246_cast_fp16 = mul(x = var_1244_cast_fp16, y = var_1245_to_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1054_cast_fp16, var_960_cast_fp16))[name = tensor("op_1248_cast_fp16")]; + tensor var_1249_to_fp16 = const()[name = tensor("op_1249_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1250_cast_fp16 = mul(x = var_1248_cast_fp16, y = var_1249_to_fp16)[name = tensor("op_1250_cast_fp16")]; + tensor var_1252_equation_0 = const()[name = tensor("op_1252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1054_cast_fp16, var_964_cast_fp16))[name = tensor("op_1252_cast_fp16")]; + tensor var_1253_to_fp16 = const()[name = tensor("op_1253_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1254_cast_fp16 = mul(x = var_1252_cast_fp16, y = var_1253_to_fp16)[name = tensor("op_1254_cast_fp16")]; + tensor var_1256_equation_0 = const()[name = tensor("op_1256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1066_cast_fp16, var_968_cast_fp16))[name = tensor("op_1256_cast_fp16")]; + tensor var_1257_to_fp16 = const()[name = tensor("op_1257_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1258_cast_fp16 = mul(x = var_1256_cast_fp16, y = var_1257_to_fp16)[name = tensor("op_1258_cast_fp16")]; + tensor var_1260_equation_0 = const()[name = tensor("op_1260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1066_cast_fp16, var_972_cast_fp16))[name = tensor("op_1260_cast_fp16")]; + tensor var_1261_to_fp16 = const()[name = tensor("op_1261_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1262_cast_fp16 = mul(x = var_1260_cast_fp16, y = var_1261_to_fp16)[name = tensor("op_1262_cast_fp16")]; + tensor var_1264_equation_0 = const()[name = tensor("op_1264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1066_cast_fp16, var_976_cast_fp16))[name = tensor("op_1264_cast_fp16")]; + tensor var_1265_to_fp16 = const()[name = tensor("op_1265_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1266_cast_fp16 = mul(x = var_1264_cast_fp16, y = var_1265_to_fp16)[name = tensor("op_1266_cast_fp16")]; + tensor var_1268_equation_0 = const()[name = tensor("op_1268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1078_cast_fp16, var_980_cast_fp16))[name = tensor("op_1268_cast_fp16")]; + tensor var_1269_to_fp16 = const()[name = tensor("op_1269_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1270_cast_fp16 = mul(x = var_1268_cast_fp16, y = var_1269_to_fp16)[name = tensor("op_1270_cast_fp16")]; + tensor var_1272_equation_0 = const()[name = tensor("op_1272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1078_cast_fp16, var_984_cast_fp16))[name = tensor("op_1272_cast_fp16")]; + tensor var_1273_to_fp16 = const()[name = tensor("op_1273_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1274_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = tensor("op_1274_cast_fp16")]; + tensor var_1276_equation_0 = const()[name = tensor("op_1276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1078_cast_fp16, var_988_cast_fp16))[name = tensor("op_1276_cast_fp16")]; + tensor var_1277_to_fp16 = const()[name = tensor("op_1277_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1278_cast_fp16 = mul(x = var_1276_cast_fp16, y = var_1277_to_fp16)[name = tensor("op_1278_cast_fp16")]; + tensor var_1280_equation_0 = const()[name = tensor("op_1280_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1090_cast_fp16, var_992_cast_fp16))[name = tensor("op_1280_cast_fp16")]; + tensor var_1281_to_fp16 = const()[name = tensor("op_1281_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1282_cast_fp16 = mul(x = var_1280_cast_fp16, y = var_1281_to_fp16)[name = tensor("op_1282_cast_fp16")]; + tensor var_1284_equation_0 = const()[name = tensor("op_1284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1284_cast_fp16 = einsum(equation = var_1284_equation_0, values = (var_1090_cast_fp16, var_996_cast_fp16))[name = tensor("op_1284_cast_fp16")]; + tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1286_cast_fp16 = mul(x = var_1284_cast_fp16, y = var_1285_to_fp16)[name = tensor("op_1286_cast_fp16")]; + tensor var_1288_equation_0 = const()[name = tensor("op_1288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1288_cast_fp16 = einsum(equation = var_1288_equation_0, values = (var_1090_cast_fp16, var_1000_cast_fp16))[name = tensor("op_1288_cast_fp16")]; + tensor var_1289_to_fp16 = const()[name = tensor("op_1289_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1290_cast_fp16 = mul(x = var_1288_cast_fp16, y = var_1289_to_fp16)[name = tensor("op_1290_cast_fp16")]; + tensor var_1292_equation_0 = const()[name = tensor("op_1292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1292_cast_fp16 = einsum(equation = var_1292_equation_0, values = (var_1102_cast_fp16, var_1004_cast_fp16))[name = tensor("op_1292_cast_fp16")]; + tensor var_1293_to_fp16 = const()[name = tensor("op_1293_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1294_cast_fp16 = mul(x = var_1292_cast_fp16, y = var_1293_to_fp16)[name = tensor("op_1294_cast_fp16")]; + tensor var_1296_equation_0 = const()[name = tensor("op_1296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1296_cast_fp16 = einsum(equation = var_1296_equation_0, values = (var_1102_cast_fp16, var_1008_cast_fp16))[name = tensor("op_1296_cast_fp16")]; + tensor var_1297_to_fp16 = const()[name = tensor("op_1297_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1298_cast_fp16 = mul(x = var_1296_cast_fp16, y = var_1297_to_fp16)[name = tensor("op_1298_cast_fp16")]; + tensor var_1300_equation_0 = const()[name = tensor("op_1300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1300_cast_fp16 = einsum(equation = var_1300_equation_0, values = (var_1102_cast_fp16, var_1012_cast_fp16))[name = tensor("op_1300_cast_fp16")]; + tensor var_1301_to_fp16 = const()[name = tensor("op_1301_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1302_cast_fp16 = mul(x = var_1300_cast_fp16, y = var_1301_to_fp16)[name = tensor("op_1302_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_1210_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_1214_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_1218_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_1222_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_1226_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_1230_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_1234_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_1238_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1242_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1246_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1250_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1254_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1258_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1262_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1266_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1270_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1274_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1278_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1282_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1286_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1290_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1294_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1298_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_1302_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_1327_cast_fp16 = softmax(axis = var_779, x = aw_49_cast_fp16)[name = tensor("op_1327_cast_fp16")]; + tensor var_1328_cast_fp16 = softmax(axis = var_779, x = aw_51_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1329_cast_fp16 = softmax(axis = var_779, x = aw_53_cast_fp16)[name = tensor("op_1329_cast_fp16")]; + tensor var_1330_cast_fp16 = softmax(axis = var_779, x = aw_55_cast_fp16)[name = tensor("op_1330_cast_fp16")]; + tensor var_1331_cast_fp16 = softmax(axis = var_779, x = aw_57_cast_fp16)[name = tensor("op_1331_cast_fp16")]; + tensor var_1332_cast_fp16 = softmax(axis = var_779, x = aw_59_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1333_cast_fp16 = softmax(axis = var_779, x = aw_61_cast_fp16)[name = tensor("op_1333_cast_fp16")]; + tensor var_1334_cast_fp16 = softmax(axis = var_779, x = aw_63_cast_fp16)[name = tensor("op_1334_cast_fp16")]; + tensor var_1335_cast_fp16 = softmax(axis = var_779, x = aw_65_cast_fp16)[name = tensor("op_1335_cast_fp16")]; + tensor var_1336_cast_fp16 = softmax(axis = var_779, x = aw_67_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1337_cast_fp16 = softmax(axis = var_779, x = aw_69_cast_fp16)[name = tensor("op_1337_cast_fp16")]; + tensor var_1338_cast_fp16 = softmax(axis = var_779, x = aw_71_cast_fp16)[name = tensor("op_1338_cast_fp16")]; + tensor var_1339_cast_fp16 = softmax(axis = var_779, x = aw_73_cast_fp16)[name = tensor("op_1339_cast_fp16")]; + tensor var_1340_cast_fp16 = softmax(axis = var_779, x = aw_75_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1341_cast_fp16 = softmax(axis = var_779, x = aw_77_cast_fp16)[name = tensor("op_1341_cast_fp16")]; + tensor var_1342_cast_fp16 = softmax(axis = var_779, x = aw_79_cast_fp16)[name = tensor("op_1342_cast_fp16")]; + tensor var_1343_cast_fp16 = softmax(axis = var_779, x = aw_81_cast_fp16)[name = tensor("op_1343_cast_fp16")]; + tensor var_1344_cast_fp16 = softmax(axis = var_779, x = aw_83_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1345_cast_fp16 = softmax(axis = var_779, x = aw_85_cast_fp16)[name = tensor("op_1345_cast_fp16")]; + tensor var_1346_cast_fp16 = softmax(axis = var_779, x = aw_87_cast_fp16)[name = tensor("op_1346_cast_fp16")]; + tensor var_1347_cast_fp16 = softmax(axis = var_779, x = aw_89_cast_fp16)[name = tensor("op_1347_cast_fp16")]; + tensor var_1348_cast_fp16 = softmax(axis = var_779, x = aw_91_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1349_cast_fp16 = softmax(axis = var_779, x = aw_93_cast_fp16)[name = tensor("op_1349_cast_fp16")]; + tensor var_1350_cast_fp16 = softmax(axis = var_779, x = aw_cast_fp16)[name = tensor("op_1350_cast_fp16")]; + tensor var_1352_equation_0 = const()[name = tensor("op_1352_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1352_cast_fp16 = einsum(equation = var_1352_equation_0, values = (var_1112_cast_fp16, var_1327_cast_fp16))[name = tensor("op_1352_cast_fp16")]; + tensor var_1354_equation_0 = const()[name = tensor("op_1354_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1354_cast_fp16 = einsum(equation = var_1354_equation_0, values = (var_1112_cast_fp16, var_1328_cast_fp16))[name = tensor("op_1354_cast_fp16")]; + tensor var_1356_equation_0 = const()[name = tensor("op_1356_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1356_cast_fp16 = einsum(equation = var_1356_equation_0, values = (var_1112_cast_fp16, var_1329_cast_fp16))[name = tensor("op_1356_cast_fp16")]; + tensor var_1358_equation_0 = const()[name = tensor("op_1358_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1358_cast_fp16 = einsum(equation = var_1358_equation_0, values = (var_1124_cast_fp16, var_1330_cast_fp16))[name = tensor("op_1358_cast_fp16")]; + tensor var_1360_equation_0 = const()[name = tensor("op_1360_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1360_cast_fp16 = einsum(equation = var_1360_equation_0, values = (var_1124_cast_fp16, var_1331_cast_fp16))[name = tensor("op_1360_cast_fp16")]; + tensor var_1362_equation_0 = const()[name = tensor("op_1362_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1362_cast_fp16 = einsum(equation = var_1362_equation_0, values = (var_1124_cast_fp16, var_1332_cast_fp16))[name = tensor("op_1362_cast_fp16")]; + tensor var_1364_equation_0 = const()[name = tensor("op_1364_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1364_cast_fp16 = einsum(equation = var_1364_equation_0, values = (var_1136_cast_fp16, var_1333_cast_fp16))[name = tensor("op_1364_cast_fp16")]; + tensor var_1366_equation_0 = const()[name = tensor("op_1366_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1366_cast_fp16 = einsum(equation = var_1366_equation_0, values = (var_1136_cast_fp16, var_1334_cast_fp16))[name = tensor("op_1366_cast_fp16")]; + tensor var_1368_equation_0 = const()[name = tensor("op_1368_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1368_cast_fp16 = einsum(equation = var_1368_equation_0, values = (var_1136_cast_fp16, var_1335_cast_fp16))[name = tensor("op_1368_cast_fp16")]; + tensor var_1370_equation_0 = const()[name = tensor("op_1370_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1370_cast_fp16 = einsum(equation = var_1370_equation_0, values = (var_1148_cast_fp16, var_1336_cast_fp16))[name = tensor("op_1370_cast_fp16")]; + tensor var_1372_equation_0 = const()[name = tensor("op_1372_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1372_cast_fp16 = einsum(equation = var_1372_equation_0, values = (var_1148_cast_fp16, var_1337_cast_fp16))[name = tensor("op_1372_cast_fp16")]; + tensor var_1374_equation_0 = const()[name = tensor("op_1374_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1374_cast_fp16 = einsum(equation = var_1374_equation_0, values = (var_1148_cast_fp16, var_1338_cast_fp16))[name = tensor("op_1374_cast_fp16")]; + tensor var_1376_equation_0 = const()[name = tensor("op_1376_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1376_cast_fp16 = einsum(equation = var_1376_equation_0, values = (var_1160_cast_fp16, var_1339_cast_fp16))[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_equation_0 = const()[name = tensor("op_1378_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1378_cast_fp16 = einsum(equation = var_1378_equation_0, values = (var_1160_cast_fp16, var_1340_cast_fp16))[name = tensor("op_1378_cast_fp16")]; + tensor var_1380_equation_0 = const()[name = tensor("op_1380_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1380_cast_fp16 = einsum(equation = var_1380_equation_0, values = (var_1160_cast_fp16, var_1341_cast_fp16))[name = tensor("op_1380_cast_fp16")]; + tensor var_1382_equation_0 = const()[name = tensor("op_1382_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1382_cast_fp16 = einsum(equation = var_1382_equation_0, values = (var_1172_cast_fp16, var_1342_cast_fp16))[name = tensor("op_1382_cast_fp16")]; + tensor var_1384_equation_0 = const()[name = tensor("op_1384_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1384_cast_fp16 = einsum(equation = var_1384_equation_0, values = (var_1172_cast_fp16, var_1343_cast_fp16))[name = tensor("op_1384_cast_fp16")]; + tensor var_1386_equation_0 = const()[name = tensor("op_1386_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1386_cast_fp16 = einsum(equation = var_1386_equation_0, values = (var_1172_cast_fp16, var_1344_cast_fp16))[name = tensor("op_1386_cast_fp16")]; + tensor var_1388_equation_0 = const()[name = tensor("op_1388_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1388_cast_fp16 = einsum(equation = var_1388_equation_0, values = (var_1184_cast_fp16, var_1345_cast_fp16))[name = tensor("op_1388_cast_fp16")]; + tensor var_1390_equation_0 = const()[name = tensor("op_1390_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1390_cast_fp16 = einsum(equation = var_1390_equation_0, values = (var_1184_cast_fp16, var_1346_cast_fp16))[name = tensor("op_1390_cast_fp16")]; + tensor var_1392_equation_0 = const()[name = tensor("op_1392_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1392_cast_fp16 = einsum(equation = var_1392_equation_0, values = (var_1184_cast_fp16, var_1347_cast_fp16))[name = tensor("op_1392_cast_fp16")]; + tensor var_1394_equation_0 = const()[name = tensor("op_1394_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1394_cast_fp16 = einsum(equation = var_1394_equation_0, values = (var_1196_cast_fp16, var_1348_cast_fp16))[name = tensor("op_1394_cast_fp16")]; + tensor var_1396_equation_0 = const()[name = tensor("op_1396_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1396_cast_fp16 = einsum(equation = var_1396_equation_0, values = (var_1196_cast_fp16, var_1349_cast_fp16))[name = tensor("op_1396_cast_fp16")]; + tensor var_1398_equation_0 = const()[name = tensor("op_1398_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1398_cast_fp16 = einsum(equation = var_1398_equation_0, values = (var_1196_cast_fp16, var_1350_cast_fp16))[name = tensor("op_1398_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_779, interleave = x_27_interleave_0, values = (var_1352_cast_fp16, var_1354_cast_fp16, var_1356_cast_fp16, var_1358_cast_fp16, var_1360_cast_fp16, var_1362_cast_fp16, var_1364_cast_fp16, var_1366_cast_fp16, var_1368_cast_fp16, var_1370_cast_fp16, var_1372_cast_fp16, var_1374_cast_fp16, var_1376_cast_fp16, var_1378_cast_fp16, var_1380_cast_fp16, var_1382_cast_fp16, var_1384_cast_fp16, var_1386_cast_fp16, var_1388_cast_fp16, var_1390_cast_fp16, var_1392_cast_fp16, var_1394_cast_fp16, var_1396_cast_fp16, var_1398_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1403 = const()[name = tensor("op_1403"), val = tensor([1, 3072, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1403, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1406 = const()[name = tensor("op_1406"), val = tensor([1, 1])]; + tensor var_1408 = const()[name = tensor("op_1408"), val = tensor([1, 1])]; + tensor attention_output_pad_type_0 = const()[name = tensor("attention_output_pad_type_0"), val = tensor("custom")]; + tensor attention_output_pad_0 = const()[name = tensor("attention_output_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232803776)))]; + tensor attention_output_cast_fp16 = conv(dilations = var_1408, groups = var_779, pad = attention_output_pad_0, pad_type = attention_output_pad_type_0, strides = var_1406, weight = blocks_1_attn_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678208)))]; + tensor x_eps_cast_fp16 = concat(axis = var_779, interleave = x_eps_interleave_0, values = (x_29_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_782, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1434_to_fp16 = const()[name = tensor("op_1434_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1434_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678400)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1445 = const()[name = tensor("op_1445"), val = tensor([1, 1])]; + tensor var_1447 = const()[name = tensor("op_1447"), val = tensor([1, 1])]; + tensor input_17_pad_type_0 = const()[name = tensor("input_17_pad_type_0"), val = tensor("custom")]; + tensor input_17_pad_0 = const()[name = tensor("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251684608)))]; + tensor input_17_cast_fp16 = conv(dilations = var_1447, groups = var_779, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_1445, weight = blocks_1_mlp_fc_1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1451 = const()[name = tensor("op_1451"), val = tensor([1, 1])]; + tensor var_1453 = const()[name = tensor("op_1453"), val = tensor([1, 1])]; + tensor x_fc_2_pad_type_0 = const()[name = tensor("x_fc_2_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_pad_0 = const()[name = tensor("x_fc_2_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302016320)))]; + tensor x_fc_2_cast_fp16 = conv(dilations = var_1453, groups = var_779, pad = x_fc_2_pad_0, pad_type = x_fc_2_pad_type_0, strides = var_1451, weight = blocks_1_mlp_fc_2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_1456_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1456_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_1456_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_1459 = const()[name = tensor("op_1459"), val = tensor([1, 1])]; + tensor var_1461 = const()[name = tensor("op_1461"), val = tensor([1, 1])]; + tensor var_1463_pad_type_0 = const()[name = tensor("op_1463_pad_type_0"), val = tensor("custom")]; + tensor var_1463_pad_0 = const()[name = tensor("op_1463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(352348032)))]; + tensor var_1463_cast_fp16 = conv(dilations = var_1461, groups = var_779, pad = var_1463_pad_0, pad_type = var_1463_pad_type_0, strides = var_1459, weight = blocks_1_mlp_proj_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_1463_cast_fp16")]; + tensor new_x = add(x = var_1463_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_1464_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_v_cache_0, new_k_cache_1, new_v_cache_1); +} \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk7.mlmodelc/weights/weight.bin b/Llama-3.2-3B-Instruct_chunk7.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e755d1ce103aa4cf05bef7c1ab7593f30ea61d7 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk7.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed0c78f79c19079e49b7985ac1ce866746a185e629e010b7c2536ed467b24564 +size 402679744 diff --git a/Llama-3.2-3B-Instruct_chunk8.mlmodelc/analytics/coremldata.bin b/Llama-3.2-3B-Instruct_chunk8.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a63af39cde8e590e41fffd270ab8aede737490d --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk8.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf21e446e7587de3fd840eae95f3e79729298df568725552f7ef5fd8f954e58c +size 243 diff --git a/Llama-3.2-3B-Instruct_chunk8.mlmodelc/coremldata.bin b/Llama-3.2-3B-Instruct_chunk8.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..3fed05170d981b8582c9421ec7550f748512caf2 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk8.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55b45f96f9ba201e16f197a78412041f41d2ac869df9ad95ef03af7662e7d940 +size 653 diff --git a/Llama-3.2-3B-Instruct_chunk8.mlmodelc/metadata.json b/Llama-3.2-3B-Instruct_chunk8.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..cf8bd9cb9c766b10c198630e7b6b8aa3a1a050a1 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk8.mlmodelc/metadata.json @@ -0,0 +1,178 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 14, + "Ios16.mul" : 70, + "SliceByIndex" : 88, + "Transpose" : 2, + "Ios16.einsum" : 96, + "Ios16.conv" : 14, + "Ios16.add" : 56, + "Ios16.realDiv" : 4, + "Ios16.softmax" : 48, + "Ios16.reduceL2Norm" : 4, + "Ios16.reshape" : 14, + "Ios16.silu" : 2 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 512, 1, 64]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_3_2_3B_Instruct_2024_11_09_16_14_37_chunk8", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk8.mlmodelc/model.mil b/Llama-3.2-3B-Instruct_chunk8.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..78594b4291dc45ae43652f9a31200581b19ad3c6 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk8.mlmodelc/model.mil @@ -0,0 +1,956 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { + tensor var_13 = const()[name = tensor("op_13"), val = tensor(-1)]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-2)]; + tensor var_19 = const()[name = tensor("op_19"), val = tensor(-3)]; + tensor var_52 = const()[name = tensor("op_52"), val = tensor(1)]; + tensor var_55 = const()[name = tensor("op_55"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_52, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_55, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_79_to_fp16 = const()[name = tensor("op_79_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_79_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_100 = const()[name = tensor("op_100"), val = tensor([1, 3072, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_100, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_103 = const()[name = tensor("op_103"), val = tensor([1, 1])]; + tensor var_105 = const()[name = tensor("op_105"), val = tensor([1, 1])]; + tensor q_1_pad_type_0 = const()[name = tensor("q_1_pad_type_0"), val = tensor("custom")]; + tensor q_1_pad_0 = const()[name = tensor("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6464)))]; + tensor q_1_cast_fp16 = conv(dilations = var_105, groups = var_52, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = var_103, weight = blocks_0_attn_q_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_109 = const()[name = tensor("op_109"), val = tensor([1, 1])]; + tensor var_111 = const()[name = tensor("op_111"), val = tensor([1, 1])]; + tensor k_1_pad_type_0 = const()[name = tensor("k_1_pad_type_0"), val = tensor("custom")]; + tensor k_1_pad_0 = const()[name = tensor("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18880896)))]; + tensor k_1_cast_fp16 = conv(dilations = var_111, groups = var_52, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = var_109, weight = blocks_0_attn_k_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_115 = const()[name = tensor("op_115"), val = tensor([1, 1])]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor v_1_pad_type_0 = const()[name = tensor("v_1_pad_type_0"), val = tensor("custom")]; + tensor v_1_pad_0 = const()[name = tensor("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25172416)))]; + tensor v_1_cast_fp16 = conv(dilations = var_117, groups = var_52, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = var_115, weight = blocks_0_attn_v_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_120 = const()[name = tensor("op_120"), val = tensor([1, 24, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_120, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_122 = const()[name = tensor("op_122"), val = tensor([1, -1, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_122, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_136_begin_0 = const()[name = tensor("op_136_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_136_end_0 = const()[name = tensor("op_136_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_136_end_mask_0 = const()[name = tensor("op_136_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_136_cast_fp16 = slice_by_index(begin = var_136_begin_0, end = var_136_end_0, end_mask = var_136_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_136_cast_fp16")]; + tensor var_142_begin_0 = const()[name = tensor("op_142_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_142_end_0 = const()[name = tensor("op_142_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_142_end_mask_0 = const()[name = tensor("op_142_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_142_cast_fp16 = slice_by_index(begin = var_142_begin_0, end = var_142_end_0, end_mask = var_142_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_142_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_144_cast_fp16 = mul(x = var_142_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_144_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_17, interleave = rotated_1_interleave_0, values = (var_144_cast_fp16, var_136_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_147_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_147_cast_fp16")]; + tensor var_148_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_148_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_147_cast_fp16, y = var_148_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor var_167_begin_0 = const()[name = tensor("op_167_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_167_end_0 = const()[name = tensor("op_167_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_167_end_mask_0 = const()[name = tensor("op_167_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_167_cast_fp16 = slice_by_index(begin = var_167_begin_0, end = var_167_end_0, end_mask = var_167_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_167_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_169_cast_fp16 = mul(x = var_167_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_169_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_17, interleave = rotated_3_interleave_0, values = (var_169_cast_fp16, var_161_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_172_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_172_cast_fp16")]; + tensor var_173_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_173_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_172_cast_fp16, y = var_173_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_176 = const()[name = tensor("op_176"), val = tensor([1, -1, 1, 64])]; + tensor k_7_cast_fp16 = reshape(shape = var_176, x = roped_3_cast_fp16)[name = tensor("k_7_cast_fp16")]; + tensor var_178 = const()[name = tensor("op_178"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_178, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_11_interleave_0 = const()[name = tensor("k_11_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_9_perm_0, x = k_7_cast_fp16)[name = tensor("transpose_1")]; + tensor k_11_cast_fp16 = concat(axis = var_19, interleave = k_11_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_11_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_13, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1, 3072, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_186, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_191_begin_0 = const()[name = tensor("op_191_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_191_end_0 = const()[name = tensor("op_191_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_191_end_mask_0 = const()[name = tensor("op_191_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_191_cast_fp16 = slice_by_index(begin = var_191_begin_0, end = var_191_end_0, end_mask = var_191_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_191_cast_fp16")]; + tensor var_195_begin_0 = const()[name = tensor("op_195_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_195_end_0 = const()[name = tensor("op_195_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_195_end_mask_0 = const()[name = tensor("op_195_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_195_cast_fp16 = slice_by_index(begin = var_195_begin_0, end = var_195_end_0, end_mask = var_195_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_195_cast_fp16")]; + tensor var_199_begin_0 = const()[name = tensor("op_199_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_199_end_0 = const()[name = tensor("op_199_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_199_end_mask_0 = const()[name = tensor("op_199_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_199_cast_fp16 = slice_by_index(begin = var_199_begin_0, end = var_199_end_0, end_mask = var_199_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_199_cast_fp16")]; + tensor var_203_begin_0 = const()[name = tensor("op_203_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_203_end_0 = const()[name = tensor("op_203_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_203_end_mask_0 = const()[name = tensor("op_203_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_203_cast_fp16 = slice_by_index(begin = var_203_begin_0, end = var_203_end_0, end_mask = var_203_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_203_cast_fp16")]; + tensor var_207_begin_0 = const()[name = tensor("op_207_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_207_end_0 = const()[name = tensor("op_207_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_207_end_mask_0 = const()[name = tensor("op_207_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = var_207_end_0, end_mask = var_207_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_207_cast_fp16")]; + tensor var_211_begin_0 = const()[name = tensor("op_211_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_211_end_0 = const()[name = tensor("op_211_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_211_end_mask_0 = const()[name = tensor("op_211_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_211_cast_fp16 = slice_by_index(begin = var_211_begin_0, end = var_211_end_0, end_mask = var_211_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_211_cast_fp16")]; + tensor var_215_begin_0 = const()[name = tensor("op_215_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_215_end_0 = const()[name = tensor("op_215_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_215_end_mask_0 = const()[name = tensor("op_215_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_215_cast_fp16 = slice_by_index(begin = var_215_begin_0, end = var_215_end_0, end_mask = var_215_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_215_cast_fp16")]; + tensor var_219_begin_0 = const()[name = tensor("op_219_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_219_end_0 = const()[name = tensor("op_219_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_219_end_mask_0 = const()[name = tensor("op_219_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_219_cast_fp16 = slice_by_index(begin = var_219_begin_0, end = var_219_end_0, end_mask = var_219_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_219_cast_fp16")]; + tensor var_223_begin_0 = const()[name = tensor("op_223_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_223_end_0 = const()[name = tensor("op_223_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_223_end_mask_0 = const()[name = tensor("op_223_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_223_cast_fp16 = slice_by_index(begin = var_223_begin_0, end = var_223_end_0, end_mask = var_223_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_223_cast_fp16")]; + tensor var_227_begin_0 = const()[name = tensor("op_227_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_227_end_0 = const()[name = tensor("op_227_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_227_end_mask_0 = const()[name = tensor("op_227_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_227_cast_fp16")]; + tensor var_231_begin_0 = const()[name = tensor("op_231_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_231_end_0 = const()[name = tensor("op_231_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_231_end_mask_0 = const()[name = tensor("op_231_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_231_cast_fp16 = slice_by_index(begin = var_231_begin_0, end = var_231_end_0, end_mask = var_231_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_231_cast_fp16")]; + tensor var_235_begin_0 = const()[name = tensor("op_235_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_235_end_0 = const()[name = tensor("op_235_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_235_end_mask_0 = const()[name = tensor("op_235_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = var_235_end_0, end_mask = var_235_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_235_cast_fp16")]; + tensor var_239_begin_0 = const()[name = tensor("op_239_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_239_end_0 = const()[name = tensor("op_239_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_239_end_mask_0 = const()[name = tensor("op_239_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_239_cast_fp16")]; + tensor var_243_begin_0 = const()[name = tensor("op_243_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_243_end_0 = const()[name = tensor("op_243_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_243_end_mask_0 = const()[name = tensor("op_243_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_243_cast_fp16")]; + tensor var_247_begin_0 = const()[name = tensor("op_247_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_247_end_0 = const()[name = tensor("op_247_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_247_end_mask_0 = const()[name = tensor("op_247_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_247_cast_fp16")]; + tensor var_251_begin_0 = const()[name = tensor("op_251_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_251_end_0 = const()[name = tensor("op_251_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_251_end_mask_0 = const()[name = tensor("op_251_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_251_cast_fp16")]; + tensor var_255_begin_0 = const()[name = tensor("op_255_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_255_end_0 = const()[name = tensor("op_255_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_255_end_mask_0 = const()[name = tensor("op_255_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_255_cast_fp16")]; + tensor var_259_begin_0 = const()[name = tensor("op_259_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_259_end_0 = const()[name = tensor("op_259_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_259_end_mask_0 = const()[name = tensor("op_259_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_263_cast_fp16")]; + tensor var_267_begin_0 = const()[name = tensor("op_267_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_267_end_0 = const()[name = tensor("op_267_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_267_end_mask_0 = const()[name = tensor("op_267_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_267_cast_fp16")]; + tensor var_271_begin_0 = const()[name = tensor("op_271_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_271_end_0 = const()[name = tensor("op_271_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_271_end_mask_0 = const()[name = tensor("op_271_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_271_cast_fp16")]; + tensor var_275_begin_0 = const()[name = tensor("op_275_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_275_end_0 = const()[name = tensor("op_275_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_275_end_mask_0 = const()[name = tensor("op_275_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_275_cast_fp16")]; + tensor var_279_begin_0 = const()[name = tensor("op_279_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_279_end_0 = const()[name = tensor("op_279_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_279_end_mask_0 = const()[name = tensor("op_279_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_279_cast_fp16")]; + tensor var_283_begin_0 = const()[name = tensor("op_283_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_283_end_0 = const()[name = tensor("op_283_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_283_end_mask_0 = const()[name = tensor("op_283_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_283_cast_fp16")]; + tensor var_289_begin_0 = const()[name = tensor("op_289_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_289_end_0 = const()[name = tensor("op_289_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_289_end_mask_0 = const()[name = tensor("op_289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_289_cast_fp16")]; + tensor var_301_begin_0 = const()[name = tensor("op_301_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_301_end_0 = const()[name = tensor("op_301_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_301_end_mask_0 = const()[name = tensor("op_301_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_301_cast_fp16")]; + tensor var_313_begin_0 = const()[name = tensor("op_313_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_313_end_0 = const()[name = tensor("op_313_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_313_end_mask_0 = const()[name = tensor("op_313_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_313_cast_fp16 = slice_by_index(begin = var_313_begin_0, end = var_313_end_0, end_mask = var_313_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_313_cast_fp16")]; + tensor var_325_begin_0 = const()[name = tensor("op_325_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_325_end_0 = const()[name = tensor("op_325_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_325_end_mask_0 = const()[name = tensor("op_325_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_325_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_349_begin_0 = const()[name = tensor("op_349_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_349_end_0 = const()[name = tensor("op_349_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_349_end_mask_0 = const()[name = tensor("op_349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_349_cast_fp16 = slice_by_index(begin = var_349_begin_0, end = var_349_end_0, end_mask = var_349_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_349_cast_fp16")]; + tensor var_361_begin_0 = const()[name = tensor("op_361_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_361_end_0 = const()[name = tensor("op_361_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_361_end_mask_0 = const()[name = tensor("op_361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_361_cast_fp16 = slice_by_index(begin = var_361_begin_0, end = var_361_end_0, end_mask = var_361_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_361_cast_fp16")]; + tensor var_373_begin_0 = const()[name = tensor("op_373_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_373_end_0 = const()[name = tensor("op_373_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_373_end_mask_0 = const()[name = tensor("op_373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_373_cast_fp16")]; + tensor var_383_begin_0 = const()[name = tensor("op_383_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_383_end_0 = const()[name = tensor("op_383_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_383_end_mask_0 = const()[name = tensor("op_383_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_383_cast_fp16")]; + tensor var_395_begin_0 = const()[name = tensor("op_395_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_395_end_0 = const()[name = tensor("op_395_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_395_end_mask_0 = const()[name = tensor("op_395_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_395_cast_fp16")]; + tensor var_407_begin_0 = const()[name = tensor("op_407_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_407_end_0 = const()[name = tensor("op_407_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_407_end_mask_0 = const()[name = tensor("op_407_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_407_cast_fp16 = slice_by_index(begin = var_407_begin_0, end = var_407_end_0, end_mask = var_407_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_407_cast_fp16")]; + tensor var_419_begin_0 = const()[name = tensor("op_419_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_419_end_0 = const()[name = tensor("op_419_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_419_end_mask_0 = const()[name = tensor("op_419_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_419_cast_fp16 = slice_by_index(begin = var_419_begin_0, end = var_419_end_0, end_mask = var_419_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_419_cast_fp16")]; + tensor var_431_begin_0 = const()[name = tensor("op_431_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_431_end_0 = const()[name = tensor("op_431_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_431_end_mask_0 = const()[name = tensor("op_431_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_431_cast_fp16 = slice_by_index(begin = var_431_begin_0, end = var_431_end_0, end_mask = var_431_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_431_cast_fp16")]; + tensor var_443_begin_0 = const()[name = tensor("op_443_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_443_end_0 = const()[name = tensor("op_443_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_443_end_mask_0 = const()[name = tensor("op_443_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_443_cast_fp16 = slice_by_index(begin = var_443_begin_0, end = var_443_end_0, end_mask = var_443_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_443_cast_fp16")]; + tensor var_455_begin_0 = const()[name = tensor("op_455_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_455_end_0 = const()[name = tensor("op_455_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_455_end_mask_0 = const()[name = tensor("op_455_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_455_cast_fp16")]; + tensor var_467_begin_0 = const()[name = tensor("op_467_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_467_end_0 = const()[name = tensor("op_467_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_467_end_mask_0 = const()[name = tensor("op_467_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_467_cast_fp16 = slice_by_index(begin = var_467_begin_0, end = var_467_end_0, end_mask = var_467_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_467_cast_fp16")]; + tensor var_479_equation_0 = const()[name = tensor("op_479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_289_cast_fp16, var_191_cast_fp16))[name = tensor("op_479_cast_fp16")]; + tensor var_480_to_fp16 = const()[name = tensor("op_480_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_481_cast_fp16 = mul(x = var_479_cast_fp16, y = var_480_to_fp16)[name = tensor("op_481_cast_fp16")]; + tensor var_483_equation_0 = const()[name = tensor("op_483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_289_cast_fp16, var_195_cast_fp16))[name = tensor("op_483_cast_fp16")]; + tensor var_484_to_fp16 = const()[name = tensor("op_484_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_485_cast_fp16 = mul(x = var_483_cast_fp16, y = var_484_to_fp16)[name = tensor("op_485_cast_fp16")]; + tensor var_487_equation_0 = const()[name = tensor("op_487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_487_cast_fp16 = einsum(equation = var_487_equation_0, values = (var_289_cast_fp16, var_199_cast_fp16))[name = tensor("op_487_cast_fp16")]; + tensor var_488_to_fp16 = const()[name = tensor("op_488_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_489_cast_fp16 = mul(x = var_487_cast_fp16, y = var_488_to_fp16)[name = tensor("op_489_cast_fp16")]; + tensor var_491_equation_0 = const()[name = tensor("op_491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_491_cast_fp16 = einsum(equation = var_491_equation_0, values = (var_301_cast_fp16, var_203_cast_fp16))[name = tensor("op_491_cast_fp16")]; + tensor var_492_to_fp16 = const()[name = tensor("op_492_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_493_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = tensor("op_493_cast_fp16")]; + tensor var_495_equation_0 = const()[name = tensor("op_495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_495_cast_fp16 = einsum(equation = var_495_equation_0, values = (var_301_cast_fp16, var_207_cast_fp16))[name = tensor("op_495_cast_fp16")]; + tensor var_496_to_fp16 = const()[name = tensor("op_496_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_497_cast_fp16 = mul(x = var_495_cast_fp16, y = var_496_to_fp16)[name = tensor("op_497_cast_fp16")]; + tensor var_499_equation_0 = const()[name = tensor("op_499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_499_cast_fp16 = einsum(equation = var_499_equation_0, values = (var_301_cast_fp16, var_211_cast_fp16))[name = tensor("op_499_cast_fp16")]; + tensor var_500_to_fp16 = const()[name = tensor("op_500_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_501_cast_fp16 = mul(x = var_499_cast_fp16, y = var_500_to_fp16)[name = tensor("op_501_cast_fp16")]; + tensor var_503_equation_0 = const()[name = tensor("op_503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_503_cast_fp16 = einsum(equation = var_503_equation_0, values = (var_313_cast_fp16, var_215_cast_fp16))[name = tensor("op_503_cast_fp16")]; + tensor var_504_to_fp16 = const()[name = tensor("op_504_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_505_cast_fp16 = mul(x = var_503_cast_fp16, y = var_504_to_fp16)[name = tensor("op_505_cast_fp16")]; + tensor var_507_equation_0 = const()[name = tensor("op_507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_507_cast_fp16 = einsum(equation = var_507_equation_0, values = (var_313_cast_fp16, var_219_cast_fp16))[name = tensor("op_507_cast_fp16")]; + tensor var_508_to_fp16 = const()[name = tensor("op_508_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16, y = var_508_to_fp16)[name = tensor("op_509_cast_fp16")]; + tensor var_511_equation_0 = const()[name = tensor("op_511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_511_cast_fp16 = einsum(equation = var_511_equation_0, values = (var_313_cast_fp16, var_223_cast_fp16))[name = tensor("op_511_cast_fp16")]; + tensor var_512_to_fp16 = const()[name = tensor("op_512_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_513_cast_fp16 = mul(x = var_511_cast_fp16, y = var_512_to_fp16)[name = tensor("op_513_cast_fp16")]; + tensor var_515_equation_0 = const()[name = tensor("op_515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_515_cast_fp16 = einsum(equation = var_515_equation_0, values = (var_325_cast_fp16, var_227_cast_fp16))[name = tensor("op_515_cast_fp16")]; + tensor var_516_to_fp16 = const()[name = tensor("op_516_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_517_cast_fp16 = mul(x = var_515_cast_fp16, y = var_516_to_fp16)[name = tensor("op_517_cast_fp16")]; + tensor var_519_equation_0 = const()[name = tensor("op_519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_519_cast_fp16 = einsum(equation = var_519_equation_0, values = (var_325_cast_fp16, var_231_cast_fp16))[name = tensor("op_519_cast_fp16")]; + tensor var_520_to_fp16 = const()[name = tensor("op_520_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_521_cast_fp16 = mul(x = var_519_cast_fp16, y = var_520_to_fp16)[name = tensor("op_521_cast_fp16")]; + tensor var_523_equation_0 = const()[name = tensor("op_523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_325_cast_fp16, var_235_cast_fp16))[name = tensor("op_523_cast_fp16")]; + tensor var_524_to_fp16 = const()[name = tensor("op_524_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_525_cast_fp16 = mul(x = var_523_cast_fp16, y = var_524_to_fp16)[name = tensor("op_525_cast_fp16")]; + tensor var_527_equation_0 = const()[name = tensor("op_527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_337_cast_fp16, var_239_cast_fp16))[name = tensor("op_527_cast_fp16")]; + tensor var_528_to_fp16 = const()[name = tensor("op_528_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_529_cast_fp16 = mul(x = var_527_cast_fp16, y = var_528_to_fp16)[name = tensor("op_529_cast_fp16")]; + tensor var_531_equation_0 = const()[name = tensor("op_531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_337_cast_fp16, var_243_cast_fp16))[name = tensor("op_531_cast_fp16")]; + tensor var_532_to_fp16 = const()[name = tensor("op_532_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_533_cast_fp16 = mul(x = var_531_cast_fp16, y = var_532_to_fp16)[name = tensor("op_533_cast_fp16")]; + tensor var_535_equation_0 = const()[name = tensor("op_535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_337_cast_fp16, var_247_cast_fp16))[name = tensor("op_535_cast_fp16")]; + tensor var_536_to_fp16 = const()[name = tensor("op_536_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_537_cast_fp16 = mul(x = var_535_cast_fp16, y = var_536_to_fp16)[name = tensor("op_537_cast_fp16")]; + tensor var_539_equation_0 = const()[name = tensor("op_539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_539_cast_fp16 = einsum(equation = var_539_equation_0, values = (var_349_cast_fp16, var_251_cast_fp16))[name = tensor("op_539_cast_fp16")]; + tensor var_540_to_fp16 = const()[name = tensor("op_540_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_541_cast_fp16 = mul(x = var_539_cast_fp16, y = var_540_to_fp16)[name = tensor("op_541_cast_fp16")]; + tensor var_543_equation_0 = const()[name = tensor("op_543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_543_cast_fp16 = einsum(equation = var_543_equation_0, values = (var_349_cast_fp16, var_255_cast_fp16))[name = tensor("op_543_cast_fp16")]; + tensor var_544_to_fp16 = const()[name = tensor("op_544_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_545_cast_fp16 = mul(x = var_543_cast_fp16, y = var_544_to_fp16)[name = tensor("op_545_cast_fp16")]; + tensor var_547_equation_0 = const()[name = tensor("op_547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_349_cast_fp16, var_259_cast_fp16))[name = tensor("op_547_cast_fp16")]; + tensor var_548_to_fp16 = const()[name = tensor("op_548_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_549_cast_fp16 = mul(x = var_547_cast_fp16, y = var_548_to_fp16)[name = tensor("op_549_cast_fp16")]; + tensor var_551_equation_0 = const()[name = tensor("op_551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_361_cast_fp16, var_263_cast_fp16))[name = tensor("op_551_cast_fp16")]; + tensor var_552_to_fp16 = const()[name = tensor("op_552_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_553_cast_fp16 = mul(x = var_551_cast_fp16, y = var_552_to_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_555_equation_0 = const()[name = tensor("op_555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_361_cast_fp16, var_267_cast_fp16))[name = tensor("op_555_cast_fp16")]; + tensor var_556_to_fp16 = const()[name = tensor("op_556_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_557_cast_fp16 = mul(x = var_555_cast_fp16, y = var_556_to_fp16)[name = tensor("op_557_cast_fp16")]; + tensor var_559_equation_0 = const()[name = tensor("op_559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_361_cast_fp16, var_271_cast_fp16))[name = tensor("op_559_cast_fp16")]; + tensor var_560_to_fp16 = const()[name = tensor("op_560_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = var_560_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor var_563_equation_0 = const()[name = tensor("op_563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_373_cast_fp16, var_275_cast_fp16))[name = tensor("op_563_cast_fp16")]; + tensor var_564_to_fp16 = const()[name = tensor("op_564_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_565_cast_fp16 = mul(x = var_563_cast_fp16, y = var_564_to_fp16)[name = tensor("op_565_cast_fp16")]; + tensor var_567_equation_0 = const()[name = tensor("op_567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_373_cast_fp16, var_279_cast_fp16))[name = tensor("op_567_cast_fp16")]; + tensor var_568_to_fp16 = const()[name = tensor("op_568_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_569_cast_fp16 = mul(x = var_567_cast_fp16, y = var_568_to_fp16)[name = tensor("op_569_cast_fp16")]; + tensor var_571_equation_0 = const()[name = tensor("op_571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_373_cast_fp16, var_283_cast_fp16))[name = tensor("op_571_cast_fp16")]; + tensor var_572_to_fp16 = const()[name = tensor("op_572_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_573_cast_fp16 = mul(x = var_571_cast_fp16, y = var_572_to_fp16)[name = tensor("op_573_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_481_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_485_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_489_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_493_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_497_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_501_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_505_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_509_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_513_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_517_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_521_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_525_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_529_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_533_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_537_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_541_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_545_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_549_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_553_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_557_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_561_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_565_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_569_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_573_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor var_598_cast_fp16 = softmax(axis = var_52, x = aw_1_cast_fp16)[name = tensor("op_598_cast_fp16")]; + tensor var_599_cast_fp16 = softmax(axis = var_52, x = aw_3_cast_fp16)[name = tensor("op_599_cast_fp16")]; + tensor var_600_cast_fp16 = softmax(axis = var_52, x = aw_5_cast_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_601_cast_fp16 = softmax(axis = var_52, x = aw_7_cast_fp16)[name = tensor("op_601_cast_fp16")]; + tensor var_602_cast_fp16 = softmax(axis = var_52, x = aw_9_cast_fp16)[name = tensor("op_602_cast_fp16")]; + tensor var_603_cast_fp16 = softmax(axis = var_52, x = aw_11_cast_fp16)[name = tensor("op_603_cast_fp16")]; + tensor var_604_cast_fp16 = softmax(axis = var_52, x = aw_13_cast_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_605_cast_fp16 = softmax(axis = var_52, x = aw_15_cast_fp16)[name = tensor("op_605_cast_fp16")]; + tensor var_606_cast_fp16 = softmax(axis = var_52, x = aw_17_cast_fp16)[name = tensor("op_606_cast_fp16")]; + tensor var_607_cast_fp16 = softmax(axis = var_52, x = aw_19_cast_fp16)[name = tensor("op_607_cast_fp16")]; + tensor var_608_cast_fp16 = softmax(axis = var_52, x = aw_21_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_609_cast_fp16 = softmax(axis = var_52, x = aw_23_cast_fp16)[name = tensor("op_609_cast_fp16")]; + tensor var_610_cast_fp16 = softmax(axis = var_52, x = aw_25_cast_fp16)[name = tensor("op_610_cast_fp16")]; + tensor var_611_cast_fp16 = softmax(axis = var_52, x = aw_27_cast_fp16)[name = tensor("op_611_cast_fp16")]; + tensor var_612_cast_fp16 = softmax(axis = var_52, x = aw_29_cast_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_613_cast_fp16 = softmax(axis = var_52, x = aw_31_cast_fp16)[name = tensor("op_613_cast_fp16")]; + tensor var_614_cast_fp16 = softmax(axis = var_52, x = aw_33_cast_fp16)[name = tensor("op_614_cast_fp16")]; + tensor var_615_cast_fp16 = softmax(axis = var_52, x = aw_35_cast_fp16)[name = tensor("op_615_cast_fp16")]; + tensor var_616_cast_fp16 = softmax(axis = var_52, x = aw_37_cast_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_617_cast_fp16 = softmax(axis = var_52, x = aw_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618_cast_fp16 = softmax(axis = var_52, x = aw_41_cast_fp16)[name = tensor("op_618_cast_fp16")]; + tensor var_619_cast_fp16 = softmax(axis = var_52, x = aw_43_cast_fp16)[name = tensor("op_619_cast_fp16")]; + tensor var_620_cast_fp16 = softmax(axis = var_52, x = aw_45_cast_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_621_cast_fp16 = softmax(axis = var_52, x = aw_47_cast_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_623_equation_0 = const()[name = tensor("op_623_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_383_cast_fp16, var_598_cast_fp16))[name = tensor("op_623_cast_fp16")]; + tensor var_625_equation_0 = const()[name = tensor("op_625_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_383_cast_fp16, var_599_cast_fp16))[name = tensor("op_625_cast_fp16")]; + tensor var_627_equation_0 = const()[name = tensor("op_627_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_383_cast_fp16, var_600_cast_fp16))[name = tensor("op_627_cast_fp16")]; + tensor var_629_equation_0 = const()[name = tensor("op_629_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_395_cast_fp16, var_601_cast_fp16))[name = tensor("op_629_cast_fp16")]; + tensor var_631_equation_0 = const()[name = tensor("op_631_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_395_cast_fp16, var_602_cast_fp16))[name = tensor("op_631_cast_fp16")]; + tensor var_633_equation_0 = const()[name = tensor("op_633_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_395_cast_fp16, var_603_cast_fp16))[name = tensor("op_633_cast_fp16")]; + tensor var_635_equation_0 = const()[name = tensor("op_635_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_407_cast_fp16, var_604_cast_fp16))[name = tensor("op_635_cast_fp16")]; + tensor var_637_equation_0 = const()[name = tensor("op_637_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_407_cast_fp16, var_605_cast_fp16))[name = tensor("op_637_cast_fp16")]; + tensor var_639_equation_0 = const()[name = tensor("op_639_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_407_cast_fp16, var_606_cast_fp16))[name = tensor("op_639_cast_fp16")]; + tensor var_641_equation_0 = const()[name = tensor("op_641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_419_cast_fp16, var_607_cast_fp16))[name = tensor("op_641_cast_fp16")]; + tensor var_643_equation_0 = const()[name = tensor("op_643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_419_cast_fp16, var_608_cast_fp16))[name = tensor("op_643_cast_fp16")]; + tensor var_645_equation_0 = const()[name = tensor("op_645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_419_cast_fp16, var_609_cast_fp16))[name = tensor("op_645_cast_fp16")]; + tensor var_647_equation_0 = const()[name = tensor("op_647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_647_cast_fp16 = einsum(equation = var_647_equation_0, values = (var_431_cast_fp16, var_610_cast_fp16))[name = tensor("op_647_cast_fp16")]; + tensor var_649_equation_0 = const()[name = tensor("op_649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_649_cast_fp16 = einsum(equation = var_649_equation_0, values = (var_431_cast_fp16, var_611_cast_fp16))[name = tensor("op_649_cast_fp16")]; + tensor var_651_equation_0 = const()[name = tensor("op_651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_651_cast_fp16 = einsum(equation = var_651_equation_0, values = (var_431_cast_fp16, var_612_cast_fp16))[name = tensor("op_651_cast_fp16")]; + tensor var_653_equation_0 = const()[name = tensor("op_653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_653_cast_fp16 = einsum(equation = var_653_equation_0, values = (var_443_cast_fp16, var_613_cast_fp16))[name = tensor("op_653_cast_fp16")]; + tensor var_655_equation_0 = const()[name = tensor("op_655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_655_cast_fp16 = einsum(equation = var_655_equation_0, values = (var_443_cast_fp16, var_614_cast_fp16))[name = tensor("op_655_cast_fp16")]; + tensor var_657_equation_0 = const()[name = tensor("op_657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_657_cast_fp16 = einsum(equation = var_657_equation_0, values = (var_443_cast_fp16, var_615_cast_fp16))[name = tensor("op_657_cast_fp16")]; + tensor var_659_equation_0 = const()[name = tensor("op_659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_659_cast_fp16 = einsum(equation = var_659_equation_0, values = (var_455_cast_fp16, var_616_cast_fp16))[name = tensor("op_659_cast_fp16")]; + tensor var_661_equation_0 = const()[name = tensor("op_661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_661_cast_fp16 = einsum(equation = var_661_equation_0, values = (var_455_cast_fp16, var_617_cast_fp16))[name = tensor("op_661_cast_fp16")]; + tensor var_663_equation_0 = const()[name = tensor("op_663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_663_cast_fp16 = einsum(equation = var_663_equation_0, values = (var_455_cast_fp16, var_618_cast_fp16))[name = tensor("op_663_cast_fp16")]; + tensor var_665_equation_0 = const()[name = tensor("op_665_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_665_cast_fp16 = einsum(equation = var_665_equation_0, values = (var_467_cast_fp16, var_619_cast_fp16))[name = tensor("op_665_cast_fp16")]; + tensor var_667_equation_0 = const()[name = tensor("op_667_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_667_cast_fp16 = einsum(equation = var_667_equation_0, values = (var_467_cast_fp16, var_620_cast_fp16))[name = tensor("op_667_cast_fp16")]; + tensor var_669_equation_0 = const()[name = tensor("op_669_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_669_cast_fp16 = einsum(equation = var_669_equation_0, values = (var_467_cast_fp16, var_621_cast_fp16))[name = tensor("op_669_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_52, interleave = x_11_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16, var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16, var_647_cast_fp16, var_649_cast_fp16, var_651_cast_fp16, var_653_cast_fp16, var_655_cast_fp16, var_657_cast_fp16, var_659_cast_fp16, var_661_cast_fp16, var_663_cast_fp16, var_665_cast_fp16, var_667_cast_fp16, var_669_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_674 = const()[name = tensor("op_674"), val = tensor([1, 3072, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_674, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_677 = const()[name = tensor("op_677"), val = tensor([1, 1])]; + tensor var_679 = const()[name = tensor("op_679"), val = tensor([1, 1])]; + tensor attention_output_1_pad_type_0 = const()[name = tensor("attention_output_1_pad_type_0"), val = tensor("custom")]; + tensor attention_output_1_pad_0 = const()[name = tensor("attention_output_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31463936)))]; + tensor attention_output_1_cast_fp16 = conv(dilations = var_679, groups = var_52, pad = attention_output_1_pad_0, pad_type = attention_output_1_pad_type_0, strides = var_677, weight = blocks_0_attn_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338368)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_52, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_55, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_705_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338560)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_716 = const()[name = tensor("op_716"), val = tensor([1, 1])]; + tensor var_718 = const()[name = tensor("op_718"), val = tensor([1, 1])]; + tensor input_7_pad_type_0 = const()[name = tensor("input_7_pad_type_0"), val = tensor("custom")]; + tensor input_7_pad_0 = const()[name = tensor("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50344768)))]; + tensor input_7_cast_fp16 = conv(dilations = var_718, groups = var_52, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_716, weight = blocks_0_mlp_fc_1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_722 = const()[name = tensor("op_722"), val = tensor([1, 1])]; + tensor var_724 = const()[name = tensor("op_724"), val = tensor([1, 1])]; + tensor x_fc_2_1_pad_type_0 = const()[name = tensor("x_fc_2_1_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_1_pad_0 = const()[name = tensor("x_fc_2_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100676480)))]; + tensor x_fc_2_1_cast_fp16 = conv(dilations = var_724, groups = var_52, pad = x_fc_2_1_pad_0, pad_type = x_fc_2_1_pad_type_0, strides = var_722, weight = blocks_0_mlp_fc_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_727_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_727_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_727_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_730 = const()[name = tensor("op_730"), val = tensor([1, 1])]; + tensor var_732 = const()[name = tensor("op_732"), val = tensor([1, 1])]; + tensor var_734_pad_type_0 = const()[name = tensor("op_734_pad_type_0"), val = tensor("custom")]; + tensor var_734_pad_0 = const()[name = tensor("op_734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151008192)))]; + tensor var_734_cast_fp16 = conv(dilations = var_732, groups = var_52, pad = var_734_pad_0, pad_type = var_734_pad_type_0, strides = var_730, weight = blocks_0_mlp_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("op_734_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_734_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_740 = const()[name = tensor("op_740"), val = tensor(-1)]; + tensor var_744 = const()[name = tensor("op_744"), val = tensor(-2)]; + tensor var_746 = const()[name = tensor("op_746"), val = tensor(-3)]; + tensor var_779 = const()[name = tensor("op_779"), val = tensor(1)]; + tensor var_782 = const()[name = tensor("op_782"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201339904)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_779, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_782, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_805_to_fp16 = const()[name = tensor("op_805_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_805_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201340096)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_829 = const()[name = tensor("op_829"), val = tensor([1, 3072, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_829, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_832 = const()[name = tensor("op_832"), val = tensor([1, 1])]; + tensor var_834 = const()[name = tensor("op_834"), val = tensor([1, 1])]; + tensor q_9_pad_type_0 = const()[name = tensor("q_9_pad_type_0"), val = tensor("custom")]; + tensor q_9_pad_0 = const()[name = tensor("q_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201346304)))]; + tensor q_9_cast_fp16 = conv(dilations = var_834, groups = var_779, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = var_832, weight = blocks_1_attn_q_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_838 = const()[name = tensor("op_838"), val = tensor([1, 1])]; + tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 1])]; + tensor k_13_pad_type_0 = const()[name = tensor("k_13_pad_type_0"), val = tensor("custom")]; + tensor k_13_pad_0 = const()[name = tensor("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220220736)))]; + tensor k_13_cast_fp16 = conv(dilations = var_840, groups = var_779, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = var_838, weight = blocks_1_attn_k_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_844 = const()[name = tensor("op_844"), val = tensor([1, 1])]; + tensor var_846 = const()[name = tensor("op_846"), val = tensor([1, 1])]; + tensor v_11_pad_type_0 = const()[name = tensor("v_11_pad_type_0"), val = tensor("custom")]; + tensor v_11_pad_0 = const()[name = tensor("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226512256)))]; + tensor v_11_cast_fp16 = conv(dilations = var_846, groups = var_779, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = var_844, weight = blocks_1_attn_v_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_849 = const()[name = tensor("op_849"), val = tensor([1, 24, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_849, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_851 = const()[name = tensor("op_851"), val = tensor([1, -1, 128, 64])]; + tensor k_15_cast_fp16 = reshape(shape = var_851, x = k_13_cast_fp16)[name = tensor("k_15_cast_fp16")]; + tensor var_865_begin_0 = const()[name = tensor("op_865_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_865_end_0 = const()[name = tensor("op_865_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_865_end_mask_0 = const()[name = tensor("op_865_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_865_cast_fp16")]; + tensor var_871_begin_0 = const()[name = tensor("op_871_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_871_end_0 = const()[name = tensor("op_871_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_871_end_mask_0 = const()[name = tensor("op_871_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_871_cast_fp16 = slice_by_index(begin = var_871_begin_0, end = var_871_end_0, end_mask = var_871_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_871_cast_fp16")]; + tensor const_30_promoted_to_fp16 = const()[name = tensor("const_30_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_873_cast_fp16 = mul(x = var_871_cast_fp16, y = const_30_promoted_to_fp16)[name = tensor("op_873_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_744, interleave = rotated_5_interleave_0, values = (var_873_cast_fp16, var_865_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_876_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_876_cast_fp16")]; + tensor var_877_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_877_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_876_cast_fp16, y = var_877_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_890_begin_0 = const()[name = tensor("op_890_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_890_end_0 = const()[name = tensor("op_890_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_890_end_mask_0 = const()[name = tensor("op_890_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_890_cast_fp16 = slice_by_index(begin = var_890_begin_0, end = var_890_end_0, end_mask = var_890_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_890_cast_fp16")]; + tensor var_896_begin_0 = const()[name = tensor("op_896_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_896_end_0 = const()[name = tensor("op_896_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_896_end_mask_0 = const()[name = tensor("op_896_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_896_cast_fp16 = slice_by_index(begin = var_896_begin_0, end = var_896_end_0, end_mask = var_896_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_896_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_898_cast_fp16 = mul(x = var_896_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_898_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_744, interleave = rotated_interleave_0, values = (var_898_cast_fp16, var_890_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_901_cast_fp16 = mul(x = k_15_cast_fp16, y = cos)[name = tensor("op_901_cast_fp16")]; + tensor var_902_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_902_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_901_cast_fp16, y = var_902_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_905 = const()[name = tensor("op_905"), val = tensor([1, -1, 1, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_905, x = roped_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_907 = const()[name = tensor("op_907"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_907, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_21_perm_0, x = k_19_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_746, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_740, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_915 = const()[name = tensor("op_915"), val = tensor([1, 3072, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_915, x = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_920_begin_0 = const()[name = tensor("op_920_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_end_0 = const()[name = tensor("op_920_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_920_end_mask_0 = const()[name = tensor("op_920_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = q_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor var_924_begin_0 = const()[name = tensor("op_924_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_924_end_0 = const()[name = tensor("op_924_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_924_end_mask_0 = const()[name = tensor("op_924_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_924_cast_fp16 = slice_by_index(begin = var_924_begin_0, end = var_924_end_0, end_mask = var_924_end_mask_0, x = q_cast_fp16)[name = tensor("op_924_cast_fp16")]; + tensor var_928_begin_0 = const()[name = tensor("op_928_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_928_end_0 = const()[name = tensor("op_928_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_928_end_mask_0 = const()[name = tensor("op_928_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = q_cast_fp16)[name = tensor("op_928_cast_fp16")]; + tensor var_932_begin_0 = const()[name = tensor("op_932_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_932_end_0 = const()[name = tensor("op_932_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_932_end_mask_0 = const()[name = tensor("op_932_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_932_cast_fp16 = slice_by_index(begin = var_932_begin_0, end = var_932_end_0, end_mask = var_932_end_mask_0, x = q_cast_fp16)[name = tensor("op_932_cast_fp16")]; + tensor var_936_begin_0 = const()[name = tensor("op_936_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_936_end_0 = const()[name = tensor("op_936_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_936_end_mask_0 = const()[name = tensor("op_936_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_936_cast_fp16 = slice_by_index(begin = var_936_begin_0, end = var_936_end_0, end_mask = var_936_end_mask_0, x = q_cast_fp16)[name = tensor("op_936_cast_fp16")]; + tensor var_940_begin_0 = const()[name = tensor("op_940_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_940_end_0 = const()[name = tensor("op_940_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_940_end_mask_0 = const()[name = tensor("op_940_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_940_cast_fp16 = slice_by_index(begin = var_940_begin_0, end = var_940_end_0, end_mask = var_940_end_mask_0, x = q_cast_fp16)[name = tensor("op_940_cast_fp16")]; + tensor var_944_begin_0 = const()[name = tensor("op_944_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_944_end_0 = const()[name = tensor("op_944_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_944_end_mask_0 = const()[name = tensor("op_944_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_944_cast_fp16 = slice_by_index(begin = var_944_begin_0, end = var_944_end_0, end_mask = var_944_end_mask_0, x = q_cast_fp16)[name = tensor("op_944_cast_fp16")]; + tensor var_948_begin_0 = const()[name = tensor("op_948_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_948_end_0 = const()[name = tensor("op_948_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_948_end_mask_0 = const()[name = tensor("op_948_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_948_cast_fp16 = slice_by_index(begin = var_948_begin_0, end = var_948_end_0, end_mask = var_948_end_mask_0, x = q_cast_fp16)[name = tensor("op_948_cast_fp16")]; + tensor var_952_begin_0 = const()[name = tensor("op_952_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_952_end_0 = const()[name = tensor("op_952_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_952_end_mask_0 = const()[name = tensor("op_952_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_952_cast_fp16 = slice_by_index(begin = var_952_begin_0, end = var_952_end_0, end_mask = var_952_end_mask_0, x = q_cast_fp16)[name = tensor("op_952_cast_fp16")]; + tensor var_956_begin_0 = const()[name = tensor("op_956_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_956_end_0 = const()[name = tensor("op_956_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_956_end_mask_0 = const()[name = tensor("op_956_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_956_cast_fp16 = slice_by_index(begin = var_956_begin_0, end = var_956_end_0, end_mask = var_956_end_mask_0, x = q_cast_fp16)[name = tensor("op_956_cast_fp16")]; + tensor var_960_begin_0 = const()[name = tensor("op_960_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_960_end_0 = const()[name = tensor("op_960_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_960_end_mask_0 = const()[name = tensor("op_960_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_960_cast_fp16 = slice_by_index(begin = var_960_begin_0, end = var_960_end_0, end_mask = var_960_end_mask_0, x = q_cast_fp16)[name = tensor("op_960_cast_fp16")]; + tensor var_964_begin_0 = const()[name = tensor("op_964_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_964_end_0 = const()[name = tensor("op_964_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_964_end_mask_0 = const()[name = tensor("op_964_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_964_cast_fp16 = slice_by_index(begin = var_964_begin_0, end = var_964_end_0, end_mask = var_964_end_mask_0, x = q_cast_fp16)[name = tensor("op_964_cast_fp16")]; + tensor var_968_begin_0 = const()[name = tensor("op_968_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_968_end_0 = const()[name = tensor("op_968_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_968_end_mask_0 = const()[name = tensor("op_968_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = q_cast_fp16)[name = tensor("op_968_cast_fp16")]; + tensor var_972_begin_0 = const()[name = tensor("op_972_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_972_end_0 = const()[name = tensor("op_972_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_972_end_mask_0 = const()[name = tensor("op_972_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = q_cast_fp16)[name = tensor("op_972_cast_fp16")]; + tensor var_976_begin_0 = const()[name = tensor("op_976_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_976_end_0 = const()[name = tensor("op_976_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_976_end_mask_0 = const()[name = tensor("op_976_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = var_976_end_0, end_mask = var_976_end_mask_0, x = q_cast_fp16)[name = tensor("op_976_cast_fp16")]; + tensor var_980_begin_0 = const()[name = tensor("op_980_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_980_end_0 = const()[name = tensor("op_980_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_980_end_mask_0 = const()[name = tensor("op_980_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_980_cast_fp16 = slice_by_index(begin = var_980_begin_0, end = var_980_end_0, end_mask = var_980_end_mask_0, x = q_cast_fp16)[name = tensor("op_980_cast_fp16")]; + tensor var_984_begin_0 = const()[name = tensor("op_984_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_984_end_0 = const()[name = tensor("op_984_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_984_end_mask_0 = const()[name = tensor("op_984_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_984_cast_fp16 = slice_by_index(begin = var_984_begin_0, end = var_984_end_0, end_mask = var_984_end_mask_0, x = q_cast_fp16)[name = tensor("op_984_cast_fp16")]; + tensor var_988_begin_0 = const()[name = tensor("op_988_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_988_end_0 = const()[name = tensor("op_988_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_988_end_mask_0 = const()[name = tensor("op_988_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_988_cast_fp16 = slice_by_index(begin = var_988_begin_0, end = var_988_end_0, end_mask = var_988_end_mask_0, x = q_cast_fp16)[name = tensor("op_988_cast_fp16")]; + tensor var_992_begin_0 = const()[name = tensor("op_992_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_992_end_0 = const()[name = tensor("op_992_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_992_end_mask_0 = const()[name = tensor("op_992_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_992_cast_fp16 = slice_by_index(begin = var_992_begin_0, end = var_992_end_0, end_mask = var_992_end_mask_0, x = q_cast_fp16)[name = tensor("op_992_cast_fp16")]; + tensor var_996_begin_0 = const()[name = tensor("op_996_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_996_end_0 = const()[name = tensor("op_996_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_996_end_mask_0 = const()[name = tensor("op_996_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_996_cast_fp16 = slice_by_index(begin = var_996_begin_0, end = var_996_end_0, end_mask = var_996_end_mask_0, x = q_cast_fp16)[name = tensor("op_996_cast_fp16")]; + tensor var_1000_begin_0 = const()[name = tensor("op_1000_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1000_end_0 = const()[name = tensor("op_1000_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1000_end_mask_0 = const()[name = tensor("op_1000_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = var_1000_end_0, end_mask = var_1000_end_mask_0, x = q_cast_fp16)[name = tensor("op_1000_cast_fp16")]; + tensor var_1004_begin_0 = const()[name = tensor("op_1004_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1004_end_0 = const()[name = tensor("op_1004_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1004_end_mask_0 = const()[name = tensor("op_1004_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1004_cast_fp16 = slice_by_index(begin = var_1004_begin_0, end = var_1004_end_0, end_mask = var_1004_end_mask_0, x = q_cast_fp16)[name = tensor("op_1004_cast_fp16")]; + tensor var_1008_begin_0 = const()[name = tensor("op_1008_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1008_end_0 = const()[name = tensor("op_1008_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1008_end_mask_0 = const()[name = tensor("op_1008_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = var_1008_end_0, end_mask = var_1008_end_mask_0, x = q_cast_fp16)[name = tensor("op_1008_cast_fp16")]; + tensor var_1012_begin_0 = const()[name = tensor("op_1012_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1012_end_0 = const()[name = tensor("op_1012_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1012_end_mask_0 = const()[name = tensor("op_1012_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1012_cast_fp16 = slice_by_index(begin = var_1012_begin_0, end = var_1012_end_0, end_mask = var_1012_end_mask_0, x = q_cast_fp16)[name = tensor("op_1012_cast_fp16")]; + tensor var_1018_begin_0 = const()[name = tensor("op_1018_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1018_end_0 = const()[name = tensor("op_1018_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1018_end_mask_0 = const()[name = tensor("op_1018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1018_cast_fp16 = slice_by_index(begin = var_1018_begin_0, end = var_1018_end_0, end_mask = var_1018_end_mask_0, x = k_cast_fp16)[name = tensor("op_1018_cast_fp16")]; + tensor var_1030_begin_0 = const()[name = tensor("op_1030_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1030_end_0 = const()[name = tensor("op_1030_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1030_end_mask_0 = const()[name = tensor("op_1030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, x = k_cast_fp16)[name = tensor("op_1030_cast_fp16")]; + tensor var_1042_begin_0 = const()[name = tensor("op_1042_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1042_end_0 = const()[name = tensor("op_1042_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1042_end_mask_0 = const()[name = tensor("op_1042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = k_cast_fp16)[name = tensor("op_1042_cast_fp16")]; + tensor var_1054_begin_0 = const()[name = tensor("op_1054_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1054_end_0 = const()[name = tensor("op_1054_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1054_end_mask_0 = const()[name = tensor("op_1054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1054_cast_fp16 = slice_by_index(begin = var_1054_begin_0, end = var_1054_end_0, end_mask = var_1054_end_mask_0, x = k_cast_fp16)[name = tensor("op_1054_cast_fp16")]; + tensor var_1066_begin_0 = const()[name = tensor("op_1066_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1066_end_0 = const()[name = tensor("op_1066_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1066_end_mask_0 = const()[name = tensor("op_1066_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1066_cast_fp16 = slice_by_index(begin = var_1066_begin_0, end = var_1066_end_0, end_mask = var_1066_end_mask_0, x = k_cast_fp16)[name = tensor("op_1066_cast_fp16")]; + tensor var_1078_begin_0 = const()[name = tensor("op_1078_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1078_end_0 = const()[name = tensor("op_1078_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1078_end_mask_0 = const()[name = tensor("op_1078_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1078_cast_fp16 = slice_by_index(begin = var_1078_begin_0, end = var_1078_end_0, end_mask = var_1078_end_mask_0, x = k_cast_fp16)[name = tensor("op_1078_cast_fp16")]; + tensor var_1090_begin_0 = const()[name = tensor("op_1090_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1090_end_0 = const()[name = tensor("op_1090_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1090_end_mask_0 = const()[name = tensor("op_1090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = k_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor var_1102_begin_0 = const()[name = tensor("op_1102_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1102_end_0 = const()[name = tensor("op_1102_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1102_end_mask_0 = const()[name = tensor("op_1102_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1102_cast_fp16 = slice_by_index(begin = var_1102_begin_0, end = var_1102_end_0, end_mask = var_1102_end_mask_0, x = k_cast_fp16)[name = tensor("op_1102_cast_fp16")]; + tensor var_1112_begin_0 = const()[name = tensor("op_1112_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1112_end_0 = const()[name = tensor("op_1112_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1112_end_mask_0 = const()[name = tensor("op_1112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1112_cast_fp16")]; + tensor var_1124_begin_0 = const()[name = tensor("op_1124_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1124_end_0 = const()[name = tensor("op_1124_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1124_end_mask_0 = const()[name = tensor("op_1124_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1124_cast_fp16")]; + tensor var_1136_begin_0 = const()[name = tensor("op_1136_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1136_end_0 = const()[name = tensor("op_1136_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1136_end_mask_0 = const()[name = tensor("op_1136_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1136_cast_fp16")]; + tensor var_1148_begin_0 = const()[name = tensor("op_1148_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1148_end_0 = const()[name = tensor("op_1148_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1148_end_mask_0 = const()[name = tensor("op_1148_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1148_cast_fp16")]; + tensor var_1160_begin_0 = const()[name = tensor("op_1160_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1160_end_0 = const()[name = tensor("op_1160_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1160_end_mask_0 = const()[name = tensor("op_1160_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1160_cast_fp16")]; + tensor var_1172_begin_0 = const()[name = tensor("op_1172_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1172_end_0 = const()[name = tensor("op_1172_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1172_end_mask_0 = const()[name = tensor("op_1172_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1172_cast_fp16 = slice_by_index(begin = var_1172_begin_0, end = var_1172_end_0, end_mask = var_1172_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1172_cast_fp16")]; + tensor var_1184_begin_0 = const()[name = tensor("op_1184_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1184_end_0 = const()[name = tensor("op_1184_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1184_end_mask_0 = const()[name = tensor("op_1184_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1184_cast_fp16 = slice_by_index(begin = var_1184_begin_0, end = var_1184_end_0, end_mask = var_1184_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1184_cast_fp16")]; + tensor var_1196_begin_0 = const()[name = tensor("op_1196_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1196_end_0 = const()[name = tensor("op_1196_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1196_end_mask_0 = const()[name = tensor("op_1196_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1196_cast_fp16")]; + tensor var_1208_equation_0 = const()[name = tensor("op_1208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1208_cast_fp16 = einsum(equation = var_1208_equation_0, values = (var_1018_cast_fp16, var_920_cast_fp16))[name = tensor("op_1208_cast_fp16")]; + tensor var_1209_to_fp16 = const()[name = tensor("op_1209_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1210_cast_fp16 = mul(x = var_1208_cast_fp16, y = var_1209_to_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1212_equation_0 = const()[name = tensor("op_1212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1212_cast_fp16 = einsum(equation = var_1212_equation_0, values = (var_1018_cast_fp16, var_924_cast_fp16))[name = tensor("op_1212_cast_fp16")]; + tensor var_1213_to_fp16 = const()[name = tensor("op_1213_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1214_cast_fp16 = mul(x = var_1212_cast_fp16, y = var_1213_to_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1216_equation_0 = const()[name = tensor("op_1216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1216_cast_fp16 = einsum(equation = var_1216_equation_0, values = (var_1018_cast_fp16, var_928_cast_fp16))[name = tensor("op_1216_cast_fp16")]; + tensor var_1217_to_fp16 = const()[name = tensor("op_1217_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1218_cast_fp16 = mul(x = var_1216_cast_fp16, y = var_1217_to_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1220_equation_0 = const()[name = tensor("op_1220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1220_cast_fp16 = einsum(equation = var_1220_equation_0, values = (var_1030_cast_fp16, var_932_cast_fp16))[name = tensor("op_1220_cast_fp16")]; + tensor var_1221_to_fp16 = const()[name = tensor("op_1221_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1222_cast_fp16 = mul(x = var_1220_cast_fp16, y = var_1221_to_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1224_equation_0 = const()[name = tensor("op_1224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1224_cast_fp16 = einsum(equation = var_1224_equation_0, values = (var_1030_cast_fp16, var_936_cast_fp16))[name = tensor("op_1224_cast_fp16")]; + tensor var_1225_to_fp16 = const()[name = tensor("op_1225_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1226_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1030_cast_fp16, var_940_cast_fp16))[name = tensor("op_1228_cast_fp16")]; + tensor var_1229_to_fp16 = const()[name = tensor("op_1229_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1230_cast_fp16 = mul(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1042_cast_fp16, var_944_cast_fp16))[name = tensor("op_1232_cast_fp16")]; + tensor var_1233_to_fp16 = const()[name = tensor("op_1233_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1234_cast_fp16 = mul(x = var_1232_cast_fp16, y = var_1233_to_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1042_cast_fp16, var_948_cast_fp16))[name = tensor("op_1236_cast_fp16")]; + tensor var_1237_to_fp16 = const()[name = tensor("op_1237_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1238_cast_fp16 = mul(x = var_1236_cast_fp16, y = var_1237_to_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1042_cast_fp16, var_952_cast_fp16))[name = tensor("op_1240_cast_fp16")]; + tensor var_1241_to_fp16 = const()[name = tensor("op_1241_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1242_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1054_cast_fp16, var_956_cast_fp16))[name = tensor("op_1244_cast_fp16")]; + tensor var_1245_to_fp16 = const()[name = tensor("op_1245_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1246_cast_fp16 = mul(x = var_1244_cast_fp16, y = var_1245_to_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1054_cast_fp16, var_960_cast_fp16))[name = tensor("op_1248_cast_fp16")]; + tensor var_1249_to_fp16 = const()[name = tensor("op_1249_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1250_cast_fp16 = mul(x = var_1248_cast_fp16, y = var_1249_to_fp16)[name = tensor("op_1250_cast_fp16")]; + tensor var_1252_equation_0 = const()[name = tensor("op_1252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1054_cast_fp16, var_964_cast_fp16))[name = tensor("op_1252_cast_fp16")]; + tensor var_1253_to_fp16 = const()[name = tensor("op_1253_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1254_cast_fp16 = mul(x = var_1252_cast_fp16, y = var_1253_to_fp16)[name = tensor("op_1254_cast_fp16")]; + tensor var_1256_equation_0 = const()[name = tensor("op_1256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1066_cast_fp16, var_968_cast_fp16))[name = tensor("op_1256_cast_fp16")]; + tensor var_1257_to_fp16 = const()[name = tensor("op_1257_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1258_cast_fp16 = mul(x = var_1256_cast_fp16, y = var_1257_to_fp16)[name = tensor("op_1258_cast_fp16")]; + tensor var_1260_equation_0 = const()[name = tensor("op_1260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1066_cast_fp16, var_972_cast_fp16))[name = tensor("op_1260_cast_fp16")]; + tensor var_1261_to_fp16 = const()[name = tensor("op_1261_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1262_cast_fp16 = mul(x = var_1260_cast_fp16, y = var_1261_to_fp16)[name = tensor("op_1262_cast_fp16")]; + tensor var_1264_equation_0 = const()[name = tensor("op_1264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1066_cast_fp16, var_976_cast_fp16))[name = tensor("op_1264_cast_fp16")]; + tensor var_1265_to_fp16 = const()[name = tensor("op_1265_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1266_cast_fp16 = mul(x = var_1264_cast_fp16, y = var_1265_to_fp16)[name = tensor("op_1266_cast_fp16")]; + tensor var_1268_equation_0 = const()[name = tensor("op_1268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1078_cast_fp16, var_980_cast_fp16))[name = tensor("op_1268_cast_fp16")]; + tensor var_1269_to_fp16 = const()[name = tensor("op_1269_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1270_cast_fp16 = mul(x = var_1268_cast_fp16, y = var_1269_to_fp16)[name = tensor("op_1270_cast_fp16")]; + tensor var_1272_equation_0 = const()[name = tensor("op_1272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1078_cast_fp16, var_984_cast_fp16))[name = tensor("op_1272_cast_fp16")]; + tensor var_1273_to_fp16 = const()[name = tensor("op_1273_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1274_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = tensor("op_1274_cast_fp16")]; + tensor var_1276_equation_0 = const()[name = tensor("op_1276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1078_cast_fp16, var_988_cast_fp16))[name = tensor("op_1276_cast_fp16")]; + tensor var_1277_to_fp16 = const()[name = tensor("op_1277_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1278_cast_fp16 = mul(x = var_1276_cast_fp16, y = var_1277_to_fp16)[name = tensor("op_1278_cast_fp16")]; + tensor var_1280_equation_0 = const()[name = tensor("op_1280_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1090_cast_fp16, var_992_cast_fp16))[name = tensor("op_1280_cast_fp16")]; + tensor var_1281_to_fp16 = const()[name = tensor("op_1281_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1282_cast_fp16 = mul(x = var_1280_cast_fp16, y = var_1281_to_fp16)[name = tensor("op_1282_cast_fp16")]; + tensor var_1284_equation_0 = const()[name = tensor("op_1284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1284_cast_fp16 = einsum(equation = var_1284_equation_0, values = (var_1090_cast_fp16, var_996_cast_fp16))[name = tensor("op_1284_cast_fp16")]; + tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1286_cast_fp16 = mul(x = var_1284_cast_fp16, y = var_1285_to_fp16)[name = tensor("op_1286_cast_fp16")]; + tensor var_1288_equation_0 = const()[name = tensor("op_1288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1288_cast_fp16 = einsum(equation = var_1288_equation_0, values = (var_1090_cast_fp16, var_1000_cast_fp16))[name = tensor("op_1288_cast_fp16")]; + tensor var_1289_to_fp16 = const()[name = tensor("op_1289_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1290_cast_fp16 = mul(x = var_1288_cast_fp16, y = var_1289_to_fp16)[name = tensor("op_1290_cast_fp16")]; + tensor var_1292_equation_0 = const()[name = tensor("op_1292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1292_cast_fp16 = einsum(equation = var_1292_equation_0, values = (var_1102_cast_fp16, var_1004_cast_fp16))[name = tensor("op_1292_cast_fp16")]; + tensor var_1293_to_fp16 = const()[name = tensor("op_1293_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1294_cast_fp16 = mul(x = var_1292_cast_fp16, y = var_1293_to_fp16)[name = tensor("op_1294_cast_fp16")]; + tensor var_1296_equation_0 = const()[name = tensor("op_1296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1296_cast_fp16 = einsum(equation = var_1296_equation_0, values = (var_1102_cast_fp16, var_1008_cast_fp16))[name = tensor("op_1296_cast_fp16")]; + tensor var_1297_to_fp16 = const()[name = tensor("op_1297_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1298_cast_fp16 = mul(x = var_1296_cast_fp16, y = var_1297_to_fp16)[name = tensor("op_1298_cast_fp16")]; + tensor var_1300_equation_0 = const()[name = tensor("op_1300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1300_cast_fp16 = einsum(equation = var_1300_equation_0, values = (var_1102_cast_fp16, var_1012_cast_fp16))[name = tensor("op_1300_cast_fp16")]; + tensor var_1301_to_fp16 = const()[name = tensor("op_1301_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1302_cast_fp16 = mul(x = var_1300_cast_fp16, y = var_1301_to_fp16)[name = tensor("op_1302_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_1210_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_1214_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_1218_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_1222_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_1226_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_1230_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_1234_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_1238_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1242_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1246_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1250_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1254_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1258_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1262_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1266_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1270_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1274_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1278_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1282_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1286_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1290_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1294_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1298_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_1302_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_1327_cast_fp16 = softmax(axis = var_779, x = aw_49_cast_fp16)[name = tensor("op_1327_cast_fp16")]; + tensor var_1328_cast_fp16 = softmax(axis = var_779, x = aw_51_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1329_cast_fp16 = softmax(axis = var_779, x = aw_53_cast_fp16)[name = tensor("op_1329_cast_fp16")]; + tensor var_1330_cast_fp16 = softmax(axis = var_779, x = aw_55_cast_fp16)[name = tensor("op_1330_cast_fp16")]; + tensor var_1331_cast_fp16 = softmax(axis = var_779, x = aw_57_cast_fp16)[name = tensor("op_1331_cast_fp16")]; + tensor var_1332_cast_fp16 = softmax(axis = var_779, x = aw_59_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1333_cast_fp16 = softmax(axis = var_779, x = aw_61_cast_fp16)[name = tensor("op_1333_cast_fp16")]; + tensor var_1334_cast_fp16 = softmax(axis = var_779, x = aw_63_cast_fp16)[name = tensor("op_1334_cast_fp16")]; + tensor var_1335_cast_fp16 = softmax(axis = var_779, x = aw_65_cast_fp16)[name = tensor("op_1335_cast_fp16")]; + tensor var_1336_cast_fp16 = softmax(axis = var_779, x = aw_67_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1337_cast_fp16 = softmax(axis = var_779, x = aw_69_cast_fp16)[name = tensor("op_1337_cast_fp16")]; + tensor var_1338_cast_fp16 = softmax(axis = var_779, x = aw_71_cast_fp16)[name = tensor("op_1338_cast_fp16")]; + tensor var_1339_cast_fp16 = softmax(axis = var_779, x = aw_73_cast_fp16)[name = tensor("op_1339_cast_fp16")]; + tensor var_1340_cast_fp16 = softmax(axis = var_779, x = aw_75_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1341_cast_fp16 = softmax(axis = var_779, x = aw_77_cast_fp16)[name = tensor("op_1341_cast_fp16")]; + tensor var_1342_cast_fp16 = softmax(axis = var_779, x = aw_79_cast_fp16)[name = tensor("op_1342_cast_fp16")]; + tensor var_1343_cast_fp16 = softmax(axis = var_779, x = aw_81_cast_fp16)[name = tensor("op_1343_cast_fp16")]; + tensor var_1344_cast_fp16 = softmax(axis = var_779, x = aw_83_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1345_cast_fp16 = softmax(axis = var_779, x = aw_85_cast_fp16)[name = tensor("op_1345_cast_fp16")]; + tensor var_1346_cast_fp16 = softmax(axis = var_779, x = aw_87_cast_fp16)[name = tensor("op_1346_cast_fp16")]; + tensor var_1347_cast_fp16 = softmax(axis = var_779, x = aw_89_cast_fp16)[name = tensor("op_1347_cast_fp16")]; + tensor var_1348_cast_fp16 = softmax(axis = var_779, x = aw_91_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1349_cast_fp16 = softmax(axis = var_779, x = aw_93_cast_fp16)[name = tensor("op_1349_cast_fp16")]; + tensor var_1350_cast_fp16 = softmax(axis = var_779, x = aw_cast_fp16)[name = tensor("op_1350_cast_fp16")]; + tensor var_1352_equation_0 = const()[name = tensor("op_1352_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1352_cast_fp16 = einsum(equation = var_1352_equation_0, values = (var_1112_cast_fp16, var_1327_cast_fp16))[name = tensor("op_1352_cast_fp16")]; + tensor var_1354_equation_0 = const()[name = tensor("op_1354_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1354_cast_fp16 = einsum(equation = var_1354_equation_0, values = (var_1112_cast_fp16, var_1328_cast_fp16))[name = tensor("op_1354_cast_fp16")]; + tensor var_1356_equation_0 = const()[name = tensor("op_1356_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1356_cast_fp16 = einsum(equation = var_1356_equation_0, values = (var_1112_cast_fp16, var_1329_cast_fp16))[name = tensor("op_1356_cast_fp16")]; + tensor var_1358_equation_0 = const()[name = tensor("op_1358_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1358_cast_fp16 = einsum(equation = var_1358_equation_0, values = (var_1124_cast_fp16, var_1330_cast_fp16))[name = tensor("op_1358_cast_fp16")]; + tensor var_1360_equation_0 = const()[name = tensor("op_1360_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1360_cast_fp16 = einsum(equation = var_1360_equation_0, values = (var_1124_cast_fp16, var_1331_cast_fp16))[name = tensor("op_1360_cast_fp16")]; + tensor var_1362_equation_0 = const()[name = tensor("op_1362_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1362_cast_fp16 = einsum(equation = var_1362_equation_0, values = (var_1124_cast_fp16, var_1332_cast_fp16))[name = tensor("op_1362_cast_fp16")]; + tensor var_1364_equation_0 = const()[name = tensor("op_1364_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1364_cast_fp16 = einsum(equation = var_1364_equation_0, values = (var_1136_cast_fp16, var_1333_cast_fp16))[name = tensor("op_1364_cast_fp16")]; + tensor var_1366_equation_0 = const()[name = tensor("op_1366_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1366_cast_fp16 = einsum(equation = var_1366_equation_0, values = (var_1136_cast_fp16, var_1334_cast_fp16))[name = tensor("op_1366_cast_fp16")]; + tensor var_1368_equation_0 = const()[name = tensor("op_1368_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1368_cast_fp16 = einsum(equation = var_1368_equation_0, values = (var_1136_cast_fp16, var_1335_cast_fp16))[name = tensor("op_1368_cast_fp16")]; + tensor var_1370_equation_0 = const()[name = tensor("op_1370_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1370_cast_fp16 = einsum(equation = var_1370_equation_0, values = (var_1148_cast_fp16, var_1336_cast_fp16))[name = tensor("op_1370_cast_fp16")]; + tensor var_1372_equation_0 = const()[name = tensor("op_1372_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1372_cast_fp16 = einsum(equation = var_1372_equation_0, values = (var_1148_cast_fp16, var_1337_cast_fp16))[name = tensor("op_1372_cast_fp16")]; + tensor var_1374_equation_0 = const()[name = tensor("op_1374_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1374_cast_fp16 = einsum(equation = var_1374_equation_0, values = (var_1148_cast_fp16, var_1338_cast_fp16))[name = tensor("op_1374_cast_fp16")]; + tensor var_1376_equation_0 = const()[name = tensor("op_1376_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1376_cast_fp16 = einsum(equation = var_1376_equation_0, values = (var_1160_cast_fp16, var_1339_cast_fp16))[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_equation_0 = const()[name = tensor("op_1378_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1378_cast_fp16 = einsum(equation = var_1378_equation_0, values = (var_1160_cast_fp16, var_1340_cast_fp16))[name = tensor("op_1378_cast_fp16")]; + tensor var_1380_equation_0 = const()[name = tensor("op_1380_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1380_cast_fp16 = einsum(equation = var_1380_equation_0, values = (var_1160_cast_fp16, var_1341_cast_fp16))[name = tensor("op_1380_cast_fp16")]; + tensor var_1382_equation_0 = const()[name = tensor("op_1382_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1382_cast_fp16 = einsum(equation = var_1382_equation_0, values = (var_1172_cast_fp16, var_1342_cast_fp16))[name = tensor("op_1382_cast_fp16")]; + tensor var_1384_equation_0 = const()[name = tensor("op_1384_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1384_cast_fp16 = einsum(equation = var_1384_equation_0, values = (var_1172_cast_fp16, var_1343_cast_fp16))[name = tensor("op_1384_cast_fp16")]; + tensor var_1386_equation_0 = const()[name = tensor("op_1386_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1386_cast_fp16 = einsum(equation = var_1386_equation_0, values = (var_1172_cast_fp16, var_1344_cast_fp16))[name = tensor("op_1386_cast_fp16")]; + tensor var_1388_equation_0 = const()[name = tensor("op_1388_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1388_cast_fp16 = einsum(equation = var_1388_equation_0, values = (var_1184_cast_fp16, var_1345_cast_fp16))[name = tensor("op_1388_cast_fp16")]; + tensor var_1390_equation_0 = const()[name = tensor("op_1390_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1390_cast_fp16 = einsum(equation = var_1390_equation_0, values = (var_1184_cast_fp16, var_1346_cast_fp16))[name = tensor("op_1390_cast_fp16")]; + tensor var_1392_equation_0 = const()[name = tensor("op_1392_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1392_cast_fp16 = einsum(equation = var_1392_equation_0, values = (var_1184_cast_fp16, var_1347_cast_fp16))[name = tensor("op_1392_cast_fp16")]; + tensor var_1394_equation_0 = const()[name = tensor("op_1394_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1394_cast_fp16 = einsum(equation = var_1394_equation_0, values = (var_1196_cast_fp16, var_1348_cast_fp16))[name = tensor("op_1394_cast_fp16")]; + tensor var_1396_equation_0 = const()[name = tensor("op_1396_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1396_cast_fp16 = einsum(equation = var_1396_equation_0, values = (var_1196_cast_fp16, var_1349_cast_fp16))[name = tensor("op_1396_cast_fp16")]; + tensor var_1398_equation_0 = const()[name = tensor("op_1398_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1398_cast_fp16 = einsum(equation = var_1398_equation_0, values = (var_1196_cast_fp16, var_1350_cast_fp16))[name = tensor("op_1398_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_779, interleave = x_27_interleave_0, values = (var_1352_cast_fp16, var_1354_cast_fp16, var_1356_cast_fp16, var_1358_cast_fp16, var_1360_cast_fp16, var_1362_cast_fp16, var_1364_cast_fp16, var_1366_cast_fp16, var_1368_cast_fp16, var_1370_cast_fp16, var_1372_cast_fp16, var_1374_cast_fp16, var_1376_cast_fp16, var_1378_cast_fp16, var_1380_cast_fp16, var_1382_cast_fp16, var_1384_cast_fp16, var_1386_cast_fp16, var_1388_cast_fp16, var_1390_cast_fp16, var_1392_cast_fp16, var_1394_cast_fp16, var_1396_cast_fp16, var_1398_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1403 = const()[name = tensor("op_1403"), val = tensor([1, 3072, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1403, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1406 = const()[name = tensor("op_1406"), val = tensor([1, 1])]; + tensor var_1408 = const()[name = tensor("op_1408"), val = tensor([1, 1])]; + tensor attention_output_pad_type_0 = const()[name = tensor("attention_output_pad_type_0"), val = tensor("custom")]; + tensor attention_output_pad_0 = const()[name = tensor("attention_output_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232803776)))]; + tensor attention_output_cast_fp16 = conv(dilations = var_1408, groups = var_779, pad = attention_output_pad_0, pad_type = attention_output_pad_type_0, strides = var_1406, weight = blocks_1_attn_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678208)))]; + tensor x_eps_cast_fp16 = concat(axis = var_779, interleave = x_eps_interleave_0, values = (x_29_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_782, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1434_to_fp16 = const()[name = tensor("op_1434_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1434_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678400)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1445 = const()[name = tensor("op_1445"), val = tensor([1, 1])]; + tensor var_1447 = const()[name = tensor("op_1447"), val = tensor([1, 1])]; + tensor input_17_pad_type_0 = const()[name = tensor("input_17_pad_type_0"), val = tensor("custom")]; + tensor input_17_pad_0 = const()[name = tensor("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251684608)))]; + tensor input_17_cast_fp16 = conv(dilations = var_1447, groups = var_779, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_1445, weight = blocks_1_mlp_fc_1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1451 = const()[name = tensor("op_1451"), val = tensor([1, 1])]; + tensor var_1453 = const()[name = tensor("op_1453"), val = tensor([1, 1])]; + tensor x_fc_2_pad_type_0 = const()[name = tensor("x_fc_2_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_pad_0 = const()[name = tensor("x_fc_2_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302016320)))]; + tensor x_fc_2_cast_fp16 = conv(dilations = var_1453, groups = var_779, pad = x_fc_2_pad_0, pad_type = x_fc_2_pad_type_0, strides = var_1451, weight = blocks_1_mlp_fc_2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_1456_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1456_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_1456_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_1459 = const()[name = tensor("op_1459"), val = tensor([1, 1])]; + tensor var_1461 = const()[name = tensor("op_1461"), val = tensor([1, 1])]; + tensor var_1463_pad_type_0 = const()[name = tensor("op_1463_pad_type_0"), val = tensor("custom")]; + tensor var_1463_pad_0 = const()[name = tensor("op_1463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(352348032)))]; + tensor var_1463_cast_fp16 = conv(dilations = var_1461, groups = var_779, pad = var_1463_pad_0, pad_type = var_1463_pad_type_0, strides = var_1459, weight = blocks_1_mlp_proj_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_1463_cast_fp16")]; + tensor new_x = add(x = var_1463_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_1464_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_v_cache_0, new_k_cache_1, new_v_cache_1); +} \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk8.mlmodelc/weights/weight.bin b/Llama-3.2-3B-Instruct_chunk8.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..e676f21da72dcaa7df5a68e36aafc518dd39b25c --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk8.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22a969fcfd92d31cf91e96a0621eb9737df6c6dfe11175f1ab9b7138f88a88ac +size 402679744 diff --git a/Llama-3.2-3B-Instruct_chunk9.mlmodelc/analytics/coremldata.bin b/Llama-3.2-3B-Instruct_chunk9.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..6a63af39cde8e590e41fffd270ab8aede737490d --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk9.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf21e446e7587de3fd840eae95f3e79729298df568725552f7ef5fd8f954e58c +size 243 diff --git a/Llama-3.2-3B-Instruct_chunk9.mlmodelc/coremldata.bin b/Llama-3.2-3B-Instruct_chunk9.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..ef844658693d8a7fc2951abf2761f8f5f9bc62c3 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk9.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8129d684aa1ea8b76708a186fe44f7ffc4aa08b4854907105fe41c0825e71875 +size 653 diff --git a/Llama-3.2-3B-Instruct_chunk9.mlmodelc/metadata.json b/Llama-3.2-3B-Instruct_chunk9.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..d8c72890b21d6a9ba891839a3cf16aff57935ee8 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk9.mlmodelc/metadata.json @@ -0,0 +1,178 @@ +[ + { + "metadataOutputVersion" : "3.0", + "storagePrecision" : "Float16", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "new_x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache_1", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Concat" : 14, + "Ios16.mul" : 70, + "SliceByIndex" : 88, + "Transpose" : 2, + "Ios16.einsum" : 96, + "Ios16.conv" : 14, + "Ios16.add" : 56, + "Ios16.realDiv" : 4, + "Ios16.softmax" : 48, + "Ios16.reduceL2Norm" : 4, + "Ios16.reshape" : 14, + "Ios16.silu" : 2 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 3072 × 8 × 8)", + "shortDescription" : "", + "shape" : "[1, 3072, 8, 8]", + "name" : "x", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "cos", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 128 × 64)", + "shortDescription" : "", + "shape" : "[128, 64]", + "name" : "sin", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 512 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 512, 1, 64]", + "name" : "mask", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)?", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "k_cache_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "1", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)?", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "v_cache_1", + "type" : "MultiArray" + } + ], + "generatedClassName" : "Llama_3_2_3B_Instruct_2024_11_09_16_14_37_chunk9", + "method" : "predict" + } +] \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk9.mlmodelc/model.mil b/Llama-3.2-3B-Instruct_chunk9.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..78594b4291dc45ae43652f9a31200581b19ad3c6 --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk9.mlmodelc/model.mil @@ -0,0 +1,956 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { + tensor var_13 = const()[name = tensor("op_13"), val = tensor(-1)]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-2)]; + tensor var_19 = const()[name = tensor("op_19"), val = tensor(-3)]; + tensor var_52 = const()[name = tensor("op_52"), val = tensor(1)]; + tensor var_55 = const()[name = tensor("op_55"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_52, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_55, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_79_to_fp16 = const()[name = tensor("op_79_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_79_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_100 = const()[name = tensor("op_100"), val = tensor([1, 3072, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_100, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_103 = const()[name = tensor("op_103"), val = tensor([1, 1])]; + tensor var_105 = const()[name = tensor("op_105"), val = tensor([1, 1])]; + tensor q_1_pad_type_0 = const()[name = tensor("q_1_pad_type_0"), val = tensor("custom")]; + tensor q_1_pad_0 = const()[name = tensor("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6464)))]; + tensor q_1_cast_fp16 = conv(dilations = var_105, groups = var_52, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = var_103, weight = blocks_0_attn_q_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_109 = const()[name = tensor("op_109"), val = tensor([1, 1])]; + tensor var_111 = const()[name = tensor("op_111"), val = tensor([1, 1])]; + tensor k_1_pad_type_0 = const()[name = tensor("k_1_pad_type_0"), val = tensor("custom")]; + tensor k_1_pad_0 = const()[name = tensor("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18880896)))]; + tensor k_1_cast_fp16 = conv(dilations = var_111, groups = var_52, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = var_109, weight = blocks_0_attn_k_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_115 = const()[name = tensor("op_115"), val = tensor([1, 1])]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor v_1_pad_type_0 = const()[name = tensor("v_1_pad_type_0"), val = tensor("custom")]; + tensor v_1_pad_0 = const()[name = tensor("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25172416)))]; + tensor v_1_cast_fp16 = conv(dilations = var_117, groups = var_52, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = var_115, weight = blocks_0_attn_v_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_120 = const()[name = tensor("op_120"), val = tensor([1, 24, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_120, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_122 = const()[name = tensor("op_122"), val = tensor([1, -1, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_122, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_136_begin_0 = const()[name = tensor("op_136_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_136_end_0 = const()[name = tensor("op_136_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_136_end_mask_0 = const()[name = tensor("op_136_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_136_cast_fp16 = slice_by_index(begin = var_136_begin_0, end = var_136_end_0, end_mask = var_136_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_136_cast_fp16")]; + tensor var_142_begin_0 = const()[name = tensor("op_142_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_142_end_0 = const()[name = tensor("op_142_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_142_end_mask_0 = const()[name = tensor("op_142_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_142_cast_fp16 = slice_by_index(begin = var_142_begin_0, end = var_142_end_0, end_mask = var_142_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_142_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_144_cast_fp16 = mul(x = var_142_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_144_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_17, interleave = rotated_1_interleave_0, values = (var_144_cast_fp16, var_136_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_147_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_147_cast_fp16")]; + tensor var_148_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_148_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_147_cast_fp16, y = var_148_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor var_167_begin_0 = const()[name = tensor("op_167_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_167_end_0 = const()[name = tensor("op_167_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_167_end_mask_0 = const()[name = tensor("op_167_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_167_cast_fp16 = slice_by_index(begin = var_167_begin_0, end = var_167_end_0, end_mask = var_167_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_167_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_169_cast_fp16 = mul(x = var_167_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_169_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_17, interleave = rotated_3_interleave_0, values = (var_169_cast_fp16, var_161_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_172_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_172_cast_fp16")]; + tensor var_173_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_173_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_172_cast_fp16, y = var_173_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_176 = const()[name = tensor("op_176"), val = tensor([1, -1, 1, 64])]; + tensor k_7_cast_fp16 = reshape(shape = var_176, x = roped_3_cast_fp16)[name = tensor("k_7_cast_fp16")]; + tensor var_178 = const()[name = tensor("op_178"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_178, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_11_interleave_0 = const()[name = tensor("k_11_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_9_perm_0, x = k_7_cast_fp16)[name = tensor("transpose_1")]; + tensor k_11_cast_fp16 = concat(axis = var_19, interleave = k_11_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_11_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_13, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1, 3072, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_186, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_191_begin_0 = const()[name = tensor("op_191_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_191_end_0 = const()[name = tensor("op_191_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_191_end_mask_0 = const()[name = tensor("op_191_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_191_cast_fp16 = slice_by_index(begin = var_191_begin_0, end = var_191_end_0, end_mask = var_191_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_191_cast_fp16")]; + tensor var_195_begin_0 = const()[name = tensor("op_195_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_195_end_0 = const()[name = tensor("op_195_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_195_end_mask_0 = const()[name = tensor("op_195_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_195_cast_fp16 = slice_by_index(begin = var_195_begin_0, end = var_195_end_0, end_mask = var_195_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_195_cast_fp16")]; + tensor var_199_begin_0 = const()[name = tensor("op_199_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_199_end_0 = const()[name = tensor("op_199_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_199_end_mask_0 = const()[name = tensor("op_199_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_199_cast_fp16 = slice_by_index(begin = var_199_begin_0, end = var_199_end_0, end_mask = var_199_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_199_cast_fp16")]; + tensor var_203_begin_0 = const()[name = tensor("op_203_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_203_end_0 = const()[name = tensor("op_203_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_203_end_mask_0 = const()[name = tensor("op_203_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_203_cast_fp16 = slice_by_index(begin = var_203_begin_0, end = var_203_end_0, end_mask = var_203_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_203_cast_fp16")]; + tensor var_207_begin_0 = const()[name = tensor("op_207_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_207_end_0 = const()[name = tensor("op_207_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_207_end_mask_0 = const()[name = tensor("op_207_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = var_207_end_0, end_mask = var_207_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_207_cast_fp16")]; + tensor var_211_begin_0 = const()[name = tensor("op_211_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_211_end_0 = const()[name = tensor("op_211_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_211_end_mask_0 = const()[name = tensor("op_211_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_211_cast_fp16 = slice_by_index(begin = var_211_begin_0, end = var_211_end_0, end_mask = var_211_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_211_cast_fp16")]; + tensor var_215_begin_0 = const()[name = tensor("op_215_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_215_end_0 = const()[name = tensor("op_215_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_215_end_mask_0 = const()[name = tensor("op_215_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_215_cast_fp16 = slice_by_index(begin = var_215_begin_0, end = var_215_end_0, end_mask = var_215_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_215_cast_fp16")]; + tensor var_219_begin_0 = const()[name = tensor("op_219_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_219_end_0 = const()[name = tensor("op_219_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_219_end_mask_0 = const()[name = tensor("op_219_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_219_cast_fp16 = slice_by_index(begin = var_219_begin_0, end = var_219_end_0, end_mask = var_219_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_219_cast_fp16")]; + tensor var_223_begin_0 = const()[name = tensor("op_223_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_223_end_0 = const()[name = tensor("op_223_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_223_end_mask_0 = const()[name = tensor("op_223_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_223_cast_fp16 = slice_by_index(begin = var_223_begin_0, end = var_223_end_0, end_mask = var_223_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_223_cast_fp16")]; + tensor var_227_begin_0 = const()[name = tensor("op_227_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_227_end_0 = const()[name = tensor("op_227_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_227_end_mask_0 = const()[name = tensor("op_227_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_227_cast_fp16")]; + tensor var_231_begin_0 = const()[name = tensor("op_231_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_231_end_0 = const()[name = tensor("op_231_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_231_end_mask_0 = const()[name = tensor("op_231_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_231_cast_fp16 = slice_by_index(begin = var_231_begin_0, end = var_231_end_0, end_mask = var_231_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_231_cast_fp16")]; + tensor var_235_begin_0 = const()[name = tensor("op_235_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_235_end_0 = const()[name = tensor("op_235_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_235_end_mask_0 = const()[name = tensor("op_235_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = var_235_end_0, end_mask = var_235_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_235_cast_fp16")]; + tensor var_239_begin_0 = const()[name = tensor("op_239_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_239_end_0 = const()[name = tensor("op_239_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_239_end_mask_0 = const()[name = tensor("op_239_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_239_cast_fp16")]; + tensor var_243_begin_0 = const()[name = tensor("op_243_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_243_end_0 = const()[name = tensor("op_243_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_243_end_mask_0 = const()[name = tensor("op_243_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_243_cast_fp16")]; + tensor var_247_begin_0 = const()[name = tensor("op_247_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_247_end_0 = const()[name = tensor("op_247_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_247_end_mask_0 = const()[name = tensor("op_247_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_247_cast_fp16")]; + tensor var_251_begin_0 = const()[name = tensor("op_251_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_251_end_0 = const()[name = tensor("op_251_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_251_end_mask_0 = const()[name = tensor("op_251_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_251_cast_fp16")]; + tensor var_255_begin_0 = const()[name = tensor("op_255_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_255_end_0 = const()[name = tensor("op_255_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_255_end_mask_0 = const()[name = tensor("op_255_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_255_cast_fp16")]; + tensor var_259_begin_0 = const()[name = tensor("op_259_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_259_end_0 = const()[name = tensor("op_259_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_259_end_mask_0 = const()[name = tensor("op_259_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_263_cast_fp16")]; + tensor var_267_begin_0 = const()[name = tensor("op_267_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_267_end_0 = const()[name = tensor("op_267_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_267_end_mask_0 = const()[name = tensor("op_267_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_267_cast_fp16")]; + tensor var_271_begin_0 = const()[name = tensor("op_271_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_271_end_0 = const()[name = tensor("op_271_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_271_end_mask_0 = const()[name = tensor("op_271_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_271_cast_fp16")]; + tensor var_275_begin_0 = const()[name = tensor("op_275_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_275_end_0 = const()[name = tensor("op_275_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_275_end_mask_0 = const()[name = tensor("op_275_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_275_cast_fp16")]; + tensor var_279_begin_0 = const()[name = tensor("op_279_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_279_end_0 = const()[name = tensor("op_279_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_279_end_mask_0 = const()[name = tensor("op_279_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_279_cast_fp16")]; + tensor var_283_begin_0 = const()[name = tensor("op_283_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_283_end_0 = const()[name = tensor("op_283_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_283_end_mask_0 = const()[name = tensor("op_283_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_283_cast_fp16")]; + tensor var_289_begin_0 = const()[name = tensor("op_289_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_289_end_0 = const()[name = tensor("op_289_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_289_end_mask_0 = const()[name = tensor("op_289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_289_cast_fp16")]; + tensor var_301_begin_0 = const()[name = tensor("op_301_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_301_end_0 = const()[name = tensor("op_301_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_301_end_mask_0 = const()[name = tensor("op_301_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_301_cast_fp16")]; + tensor var_313_begin_0 = const()[name = tensor("op_313_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_313_end_0 = const()[name = tensor("op_313_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_313_end_mask_0 = const()[name = tensor("op_313_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_313_cast_fp16 = slice_by_index(begin = var_313_begin_0, end = var_313_end_0, end_mask = var_313_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_313_cast_fp16")]; + tensor var_325_begin_0 = const()[name = tensor("op_325_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_325_end_0 = const()[name = tensor("op_325_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_325_end_mask_0 = const()[name = tensor("op_325_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_325_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_349_begin_0 = const()[name = tensor("op_349_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_349_end_0 = const()[name = tensor("op_349_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_349_end_mask_0 = const()[name = tensor("op_349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_349_cast_fp16 = slice_by_index(begin = var_349_begin_0, end = var_349_end_0, end_mask = var_349_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_349_cast_fp16")]; + tensor var_361_begin_0 = const()[name = tensor("op_361_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_361_end_0 = const()[name = tensor("op_361_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_361_end_mask_0 = const()[name = tensor("op_361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_361_cast_fp16 = slice_by_index(begin = var_361_begin_0, end = var_361_end_0, end_mask = var_361_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_361_cast_fp16")]; + tensor var_373_begin_0 = const()[name = tensor("op_373_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_373_end_0 = const()[name = tensor("op_373_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_373_end_mask_0 = const()[name = tensor("op_373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_373_cast_fp16")]; + tensor var_383_begin_0 = const()[name = tensor("op_383_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_383_end_0 = const()[name = tensor("op_383_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_383_end_mask_0 = const()[name = tensor("op_383_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_383_cast_fp16")]; + tensor var_395_begin_0 = const()[name = tensor("op_395_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_395_end_0 = const()[name = tensor("op_395_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_395_end_mask_0 = const()[name = tensor("op_395_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_395_cast_fp16")]; + tensor var_407_begin_0 = const()[name = tensor("op_407_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_407_end_0 = const()[name = tensor("op_407_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_407_end_mask_0 = const()[name = tensor("op_407_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_407_cast_fp16 = slice_by_index(begin = var_407_begin_0, end = var_407_end_0, end_mask = var_407_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_407_cast_fp16")]; + tensor var_419_begin_0 = const()[name = tensor("op_419_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_419_end_0 = const()[name = tensor("op_419_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_419_end_mask_0 = const()[name = tensor("op_419_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_419_cast_fp16 = slice_by_index(begin = var_419_begin_0, end = var_419_end_0, end_mask = var_419_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_419_cast_fp16")]; + tensor var_431_begin_0 = const()[name = tensor("op_431_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_431_end_0 = const()[name = tensor("op_431_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_431_end_mask_0 = const()[name = tensor("op_431_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_431_cast_fp16 = slice_by_index(begin = var_431_begin_0, end = var_431_end_0, end_mask = var_431_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_431_cast_fp16")]; + tensor var_443_begin_0 = const()[name = tensor("op_443_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_443_end_0 = const()[name = tensor("op_443_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_443_end_mask_0 = const()[name = tensor("op_443_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_443_cast_fp16 = slice_by_index(begin = var_443_begin_0, end = var_443_end_0, end_mask = var_443_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_443_cast_fp16")]; + tensor var_455_begin_0 = const()[name = tensor("op_455_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_455_end_0 = const()[name = tensor("op_455_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_455_end_mask_0 = const()[name = tensor("op_455_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_455_cast_fp16")]; + tensor var_467_begin_0 = const()[name = tensor("op_467_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_467_end_0 = const()[name = tensor("op_467_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_467_end_mask_0 = const()[name = tensor("op_467_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_467_cast_fp16 = slice_by_index(begin = var_467_begin_0, end = var_467_end_0, end_mask = var_467_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_467_cast_fp16")]; + tensor var_479_equation_0 = const()[name = tensor("op_479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_289_cast_fp16, var_191_cast_fp16))[name = tensor("op_479_cast_fp16")]; + tensor var_480_to_fp16 = const()[name = tensor("op_480_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_481_cast_fp16 = mul(x = var_479_cast_fp16, y = var_480_to_fp16)[name = tensor("op_481_cast_fp16")]; + tensor var_483_equation_0 = const()[name = tensor("op_483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_289_cast_fp16, var_195_cast_fp16))[name = tensor("op_483_cast_fp16")]; + tensor var_484_to_fp16 = const()[name = tensor("op_484_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_485_cast_fp16 = mul(x = var_483_cast_fp16, y = var_484_to_fp16)[name = tensor("op_485_cast_fp16")]; + tensor var_487_equation_0 = const()[name = tensor("op_487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_487_cast_fp16 = einsum(equation = var_487_equation_0, values = (var_289_cast_fp16, var_199_cast_fp16))[name = tensor("op_487_cast_fp16")]; + tensor var_488_to_fp16 = const()[name = tensor("op_488_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_489_cast_fp16 = mul(x = var_487_cast_fp16, y = var_488_to_fp16)[name = tensor("op_489_cast_fp16")]; + tensor var_491_equation_0 = const()[name = tensor("op_491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_491_cast_fp16 = einsum(equation = var_491_equation_0, values = (var_301_cast_fp16, var_203_cast_fp16))[name = tensor("op_491_cast_fp16")]; + tensor var_492_to_fp16 = const()[name = tensor("op_492_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_493_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = tensor("op_493_cast_fp16")]; + tensor var_495_equation_0 = const()[name = tensor("op_495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_495_cast_fp16 = einsum(equation = var_495_equation_0, values = (var_301_cast_fp16, var_207_cast_fp16))[name = tensor("op_495_cast_fp16")]; + tensor var_496_to_fp16 = const()[name = tensor("op_496_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_497_cast_fp16 = mul(x = var_495_cast_fp16, y = var_496_to_fp16)[name = tensor("op_497_cast_fp16")]; + tensor var_499_equation_0 = const()[name = tensor("op_499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_499_cast_fp16 = einsum(equation = var_499_equation_0, values = (var_301_cast_fp16, var_211_cast_fp16))[name = tensor("op_499_cast_fp16")]; + tensor var_500_to_fp16 = const()[name = tensor("op_500_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_501_cast_fp16 = mul(x = var_499_cast_fp16, y = var_500_to_fp16)[name = tensor("op_501_cast_fp16")]; + tensor var_503_equation_0 = const()[name = tensor("op_503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_503_cast_fp16 = einsum(equation = var_503_equation_0, values = (var_313_cast_fp16, var_215_cast_fp16))[name = tensor("op_503_cast_fp16")]; + tensor var_504_to_fp16 = const()[name = tensor("op_504_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_505_cast_fp16 = mul(x = var_503_cast_fp16, y = var_504_to_fp16)[name = tensor("op_505_cast_fp16")]; + tensor var_507_equation_0 = const()[name = tensor("op_507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_507_cast_fp16 = einsum(equation = var_507_equation_0, values = (var_313_cast_fp16, var_219_cast_fp16))[name = tensor("op_507_cast_fp16")]; + tensor var_508_to_fp16 = const()[name = tensor("op_508_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16, y = var_508_to_fp16)[name = tensor("op_509_cast_fp16")]; + tensor var_511_equation_0 = const()[name = tensor("op_511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_511_cast_fp16 = einsum(equation = var_511_equation_0, values = (var_313_cast_fp16, var_223_cast_fp16))[name = tensor("op_511_cast_fp16")]; + tensor var_512_to_fp16 = const()[name = tensor("op_512_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_513_cast_fp16 = mul(x = var_511_cast_fp16, y = var_512_to_fp16)[name = tensor("op_513_cast_fp16")]; + tensor var_515_equation_0 = const()[name = tensor("op_515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_515_cast_fp16 = einsum(equation = var_515_equation_0, values = (var_325_cast_fp16, var_227_cast_fp16))[name = tensor("op_515_cast_fp16")]; + tensor var_516_to_fp16 = const()[name = tensor("op_516_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_517_cast_fp16 = mul(x = var_515_cast_fp16, y = var_516_to_fp16)[name = tensor("op_517_cast_fp16")]; + tensor var_519_equation_0 = const()[name = tensor("op_519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_519_cast_fp16 = einsum(equation = var_519_equation_0, values = (var_325_cast_fp16, var_231_cast_fp16))[name = tensor("op_519_cast_fp16")]; + tensor var_520_to_fp16 = const()[name = tensor("op_520_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_521_cast_fp16 = mul(x = var_519_cast_fp16, y = var_520_to_fp16)[name = tensor("op_521_cast_fp16")]; + tensor var_523_equation_0 = const()[name = tensor("op_523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_325_cast_fp16, var_235_cast_fp16))[name = tensor("op_523_cast_fp16")]; + tensor var_524_to_fp16 = const()[name = tensor("op_524_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_525_cast_fp16 = mul(x = var_523_cast_fp16, y = var_524_to_fp16)[name = tensor("op_525_cast_fp16")]; + tensor var_527_equation_0 = const()[name = tensor("op_527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_337_cast_fp16, var_239_cast_fp16))[name = tensor("op_527_cast_fp16")]; + tensor var_528_to_fp16 = const()[name = tensor("op_528_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_529_cast_fp16 = mul(x = var_527_cast_fp16, y = var_528_to_fp16)[name = tensor("op_529_cast_fp16")]; + tensor var_531_equation_0 = const()[name = tensor("op_531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_337_cast_fp16, var_243_cast_fp16))[name = tensor("op_531_cast_fp16")]; + tensor var_532_to_fp16 = const()[name = tensor("op_532_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_533_cast_fp16 = mul(x = var_531_cast_fp16, y = var_532_to_fp16)[name = tensor("op_533_cast_fp16")]; + tensor var_535_equation_0 = const()[name = tensor("op_535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_337_cast_fp16, var_247_cast_fp16))[name = tensor("op_535_cast_fp16")]; + tensor var_536_to_fp16 = const()[name = tensor("op_536_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_537_cast_fp16 = mul(x = var_535_cast_fp16, y = var_536_to_fp16)[name = tensor("op_537_cast_fp16")]; + tensor var_539_equation_0 = const()[name = tensor("op_539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_539_cast_fp16 = einsum(equation = var_539_equation_0, values = (var_349_cast_fp16, var_251_cast_fp16))[name = tensor("op_539_cast_fp16")]; + tensor var_540_to_fp16 = const()[name = tensor("op_540_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_541_cast_fp16 = mul(x = var_539_cast_fp16, y = var_540_to_fp16)[name = tensor("op_541_cast_fp16")]; + tensor var_543_equation_0 = const()[name = tensor("op_543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_543_cast_fp16 = einsum(equation = var_543_equation_0, values = (var_349_cast_fp16, var_255_cast_fp16))[name = tensor("op_543_cast_fp16")]; + tensor var_544_to_fp16 = const()[name = tensor("op_544_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_545_cast_fp16 = mul(x = var_543_cast_fp16, y = var_544_to_fp16)[name = tensor("op_545_cast_fp16")]; + tensor var_547_equation_0 = const()[name = tensor("op_547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_349_cast_fp16, var_259_cast_fp16))[name = tensor("op_547_cast_fp16")]; + tensor var_548_to_fp16 = const()[name = tensor("op_548_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_549_cast_fp16 = mul(x = var_547_cast_fp16, y = var_548_to_fp16)[name = tensor("op_549_cast_fp16")]; + tensor var_551_equation_0 = const()[name = tensor("op_551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_361_cast_fp16, var_263_cast_fp16))[name = tensor("op_551_cast_fp16")]; + tensor var_552_to_fp16 = const()[name = tensor("op_552_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_553_cast_fp16 = mul(x = var_551_cast_fp16, y = var_552_to_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_555_equation_0 = const()[name = tensor("op_555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_361_cast_fp16, var_267_cast_fp16))[name = tensor("op_555_cast_fp16")]; + tensor var_556_to_fp16 = const()[name = tensor("op_556_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_557_cast_fp16 = mul(x = var_555_cast_fp16, y = var_556_to_fp16)[name = tensor("op_557_cast_fp16")]; + tensor var_559_equation_0 = const()[name = tensor("op_559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_361_cast_fp16, var_271_cast_fp16))[name = tensor("op_559_cast_fp16")]; + tensor var_560_to_fp16 = const()[name = tensor("op_560_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = var_560_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor var_563_equation_0 = const()[name = tensor("op_563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_373_cast_fp16, var_275_cast_fp16))[name = tensor("op_563_cast_fp16")]; + tensor var_564_to_fp16 = const()[name = tensor("op_564_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_565_cast_fp16 = mul(x = var_563_cast_fp16, y = var_564_to_fp16)[name = tensor("op_565_cast_fp16")]; + tensor var_567_equation_0 = const()[name = tensor("op_567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_373_cast_fp16, var_279_cast_fp16))[name = tensor("op_567_cast_fp16")]; + tensor var_568_to_fp16 = const()[name = tensor("op_568_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_569_cast_fp16 = mul(x = var_567_cast_fp16, y = var_568_to_fp16)[name = tensor("op_569_cast_fp16")]; + tensor var_571_equation_0 = const()[name = tensor("op_571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_373_cast_fp16, var_283_cast_fp16))[name = tensor("op_571_cast_fp16")]; + tensor var_572_to_fp16 = const()[name = tensor("op_572_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_573_cast_fp16 = mul(x = var_571_cast_fp16, y = var_572_to_fp16)[name = tensor("op_573_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_481_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_485_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_489_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_493_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_497_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_501_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_505_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_509_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_513_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_517_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_521_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_525_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_529_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_533_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_537_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_541_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_545_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_549_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_553_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_557_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_561_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_565_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_569_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_573_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor var_598_cast_fp16 = softmax(axis = var_52, x = aw_1_cast_fp16)[name = tensor("op_598_cast_fp16")]; + tensor var_599_cast_fp16 = softmax(axis = var_52, x = aw_3_cast_fp16)[name = tensor("op_599_cast_fp16")]; + tensor var_600_cast_fp16 = softmax(axis = var_52, x = aw_5_cast_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_601_cast_fp16 = softmax(axis = var_52, x = aw_7_cast_fp16)[name = tensor("op_601_cast_fp16")]; + tensor var_602_cast_fp16 = softmax(axis = var_52, x = aw_9_cast_fp16)[name = tensor("op_602_cast_fp16")]; + tensor var_603_cast_fp16 = softmax(axis = var_52, x = aw_11_cast_fp16)[name = tensor("op_603_cast_fp16")]; + tensor var_604_cast_fp16 = softmax(axis = var_52, x = aw_13_cast_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_605_cast_fp16 = softmax(axis = var_52, x = aw_15_cast_fp16)[name = tensor("op_605_cast_fp16")]; + tensor var_606_cast_fp16 = softmax(axis = var_52, x = aw_17_cast_fp16)[name = tensor("op_606_cast_fp16")]; + tensor var_607_cast_fp16 = softmax(axis = var_52, x = aw_19_cast_fp16)[name = tensor("op_607_cast_fp16")]; + tensor var_608_cast_fp16 = softmax(axis = var_52, x = aw_21_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_609_cast_fp16 = softmax(axis = var_52, x = aw_23_cast_fp16)[name = tensor("op_609_cast_fp16")]; + tensor var_610_cast_fp16 = softmax(axis = var_52, x = aw_25_cast_fp16)[name = tensor("op_610_cast_fp16")]; + tensor var_611_cast_fp16 = softmax(axis = var_52, x = aw_27_cast_fp16)[name = tensor("op_611_cast_fp16")]; + tensor var_612_cast_fp16 = softmax(axis = var_52, x = aw_29_cast_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_613_cast_fp16 = softmax(axis = var_52, x = aw_31_cast_fp16)[name = tensor("op_613_cast_fp16")]; + tensor var_614_cast_fp16 = softmax(axis = var_52, x = aw_33_cast_fp16)[name = tensor("op_614_cast_fp16")]; + tensor var_615_cast_fp16 = softmax(axis = var_52, x = aw_35_cast_fp16)[name = tensor("op_615_cast_fp16")]; + tensor var_616_cast_fp16 = softmax(axis = var_52, x = aw_37_cast_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_617_cast_fp16 = softmax(axis = var_52, x = aw_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618_cast_fp16 = softmax(axis = var_52, x = aw_41_cast_fp16)[name = tensor("op_618_cast_fp16")]; + tensor var_619_cast_fp16 = softmax(axis = var_52, x = aw_43_cast_fp16)[name = tensor("op_619_cast_fp16")]; + tensor var_620_cast_fp16 = softmax(axis = var_52, x = aw_45_cast_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_621_cast_fp16 = softmax(axis = var_52, x = aw_47_cast_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_623_equation_0 = const()[name = tensor("op_623_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_383_cast_fp16, var_598_cast_fp16))[name = tensor("op_623_cast_fp16")]; + tensor var_625_equation_0 = const()[name = tensor("op_625_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_383_cast_fp16, var_599_cast_fp16))[name = tensor("op_625_cast_fp16")]; + tensor var_627_equation_0 = const()[name = tensor("op_627_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_383_cast_fp16, var_600_cast_fp16))[name = tensor("op_627_cast_fp16")]; + tensor var_629_equation_0 = const()[name = tensor("op_629_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_395_cast_fp16, var_601_cast_fp16))[name = tensor("op_629_cast_fp16")]; + tensor var_631_equation_0 = const()[name = tensor("op_631_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_395_cast_fp16, var_602_cast_fp16))[name = tensor("op_631_cast_fp16")]; + tensor var_633_equation_0 = const()[name = tensor("op_633_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_395_cast_fp16, var_603_cast_fp16))[name = tensor("op_633_cast_fp16")]; + tensor var_635_equation_0 = const()[name = tensor("op_635_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_407_cast_fp16, var_604_cast_fp16))[name = tensor("op_635_cast_fp16")]; + tensor var_637_equation_0 = const()[name = tensor("op_637_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_407_cast_fp16, var_605_cast_fp16))[name = tensor("op_637_cast_fp16")]; + tensor var_639_equation_0 = const()[name = tensor("op_639_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_407_cast_fp16, var_606_cast_fp16))[name = tensor("op_639_cast_fp16")]; + tensor var_641_equation_0 = const()[name = tensor("op_641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_419_cast_fp16, var_607_cast_fp16))[name = tensor("op_641_cast_fp16")]; + tensor var_643_equation_0 = const()[name = tensor("op_643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_419_cast_fp16, var_608_cast_fp16))[name = tensor("op_643_cast_fp16")]; + tensor var_645_equation_0 = const()[name = tensor("op_645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_419_cast_fp16, var_609_cast_fp16))[name = tensor("op_645_cast_fp16")]; + tensor var_647_equation_0 = const()[name = tensor("op_647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_647_cast_fp16 = einsum(equation = var_647_equation_0, values = (var_431_cast_fp16, var_610_cast_fp16))[name = tensor("op_647_cast_fp16")]; + tensor var_649_equation_0 = const()[name = tensor("op_649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_649_cast_fp16 = einsum(equation = var_649_equation_0, values = (var_431_cast_fp16, var_611_cast_fp16))[name = tensor("op_649_cast_fp16")]; + tensor var_651_equation_0 = const()[name = tensor("op_651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_651_cast_fp16 = einsum(equation = var_651_equation_0, values = (var_431_cast_fp16, var_612_cast_fp16))[name = tensor("op_651_cast_fp16")]; + tensor var_653_equation_0 = const()[name = tensor("op_653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_653_cast_fp16 = einsum(equation = var_653_equation_0, values = (var_443_cast_fp16, var_613_cast_fp16))[name = tensor("op_653_cast_fp16")]; + tensor var_655_equation_0 = const()[name = tensor("op_655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_655_cast_fp16 = einsum(equation = var_655_equation_0, values = (var_443_cast_fp16, var_614_cast_fp16))[name = tensor("op_655_cast_fp16")]; + tensor var_657_equation_0 = const()[name = tensor("op_657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_657_cast_fp16 = einsum(equation = var_657_equation_0, values = (var_443_cast_fp16, var_615_cast_fp16))[name = tensor("op_657_cast_fp16")]; + tensor var_659_equation_0 = const()[name = tensor("op_659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_659_cast_fp16 = einsum(equation = var_659_equation_0, values = (var_455_cast_fp16, var_616_cast_fp16))[name = tensor("op_659_cast_fp16")]; + tensor var_661_equation_0 = const()[name = tensor("op_661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_661_cast_fp16 = einsum(equation = var_661_equation_0, values = (var_455_cast_fp16, var_617_cast_fp16))[name = tensor("op_661_cast_fp16")]; + tensor var_663_equation_0 = const()[name = tensor("op_663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_663_cast_fp16 = einsum(equation = var_663_equation_0, values = (var_455_cast_fp16, var_618_cast_fp16))[name = tensor("op_663_cast_fp16")]; + tensor var_665_equation_0 = const()[name = tensor("op_665_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_665_cast_fp16 = einsum(equation = var_665_equation_0, values = (var_467_cast_fp16, var_619_cast_fp16))[name = tensor("op_665_cast_fp16")]; + tensor var_667_equation_0 = const()[name = tensor("op_667_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_667_cast_fp16 = einsum(equation = var_667_equation_0, values = (var_467_cast_fp16, var_620_cast_fp16))[name = tensor("op_667_cast_fp16")]; + tensor var_669_equation_0 = const()[name = tensor("op_669_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_669_cast_fp16 = einsum(equation = var_669_equation_0, values = (var_467_cast_fp16, var_621_cast_fp16))[name = tensor("op_669_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_52, interleave = x_11_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16, var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16, var_647_cast_fp16, var_649_cast_fp16, var_651_cast_fp16, var_653_cast_fp16, var_655_cast_fp16, var_657_cast_fp16, var_659_cast_fp16, var_661_cast_fp16, var_663_cast_fp16, var_665_cast_fp16, var_667_cast_fp16, var_669_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_674 = const()[name = tensor("op_674"), val = tensor([1, 3072, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_674, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_677 = const()[name = tensor("op_677"), val = tensor([1, 1])]; + tensor var_679 = const()[name = tensor("op_679"), val = tensor([1, 1])]; + tensor attention_output_1_pad_type_0 = const()[name = tensor("attention_output_1_pad_type_0"), val = tensor("custom")]; + tensor attention_output_1_pad_0 = const()[name = tensor("attention_output_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31463936)))]; + tensor attention_output_1_cast_fp16 = conv(dilations = var_679, groups = var_52, pad = attention_output_1_pad_0, pad_type = attention_output_1_pad_type_0, strides = var_677, weight = blocks_0_attn_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338368)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_52, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_55, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_705_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338560)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_716 = const()[name = tensor("op_716"), val = tensor([1, 1])]; + tensor var_718 = const()[name = tensor("op_718"), val = tensor([1, 1])]; + tensor input_7_pad_type_0 = const()[name = tensor("input_7_pad_type_0"), val = tensor("custom")]; + tensor input_7_pad_0 = const()[name = tensor("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50344768)))]; + tensor input_7_cast_fp16 = conv(dilations = var_718, groups = var_52, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_716, weight = blocks_0_mlp_fc_1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_722 = const()[name = tensor("op_722"), val = tensor([1, 1])]; + tensor var_724 = const()[name = tensor("op_724"), val = tensor([1, 1])]; + tensor x_fc_2_1_pad_type_0 = const()[name = tensor("x_fc_2_1_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_1_pad_0 = const()[name = tensor("x_fc_2_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100676480)))]; + tensor x_fc_2_1_cast_fp16 = conv(dilations = var_724, groups = var_52, pad = x_fc_2_1_pad_0, pad_type = x_fc_2_1_pad_type_0, strides = var_722, weight = blocks_0_mlp_fc_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_727_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_727_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_727_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_730 = const()[name = tensor("op_730"), val = tensor([1, 1])]; + tensor var_732 = const()[name = tensor("op_732"), val = tensor([1, 1])]; + tensor var_734_pad_type_0 = const()[name = tensor("op_734_pad_type_0"), val = tensor("custom")]; + tensor var_734_pad_0 = const()[name = tensor("op_734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151008192)))]; + tensor var_734_cast_fp16 = conv(dilations = var_732, groups = var_52, pad = var_734_pad_0, pad_type = var_734_pad_type_0, strides = var_730, weight = blocks_0_mlp_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("op_734_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_734_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_740 = const()[name = tensor("op_740"), val = tensor(-1)]; + tensor var_744 = const()[name = tensor("op_744"), val = tensor(-2)]; + tensor var_746 = const()[name = tensor("op_746"), val = tensor(-3)]; + tensor var_779 = const()[name = tensor("op_779"), val = tensor(1)]; + tensor var_782 = const()[name = tensor("op_782"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201339904)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_779, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_782, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_805_to_fp16 = const()[name = tensor("op_805_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_805_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201340096)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_829 = const()[name = tensor("op_829"), val = tensor([1, 3072, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_829, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_832 = const()[name = tensor("op_832"), val = tensor([1, 1])]; + tensor var_834 = const()[name = tensor("op_834"), val = tensor([1, 1])]; + tensor q_9_pad_type_0 = const()[name = tensor("q_9_pad_type_0"), val = tensor("custom")]; + tensor q_9_pad_0 = const()[name = tensor("q_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201346304)))]; + tensor q_9_cast_fp16 = conv(dilations = var_834, groups = var_779, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = var_832, weight = blocks_1_attn_q_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_838 = const()[name = tensor("op_838"), val = tensor([1, 1])]; + tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 1])]; + tensor k_13_pad_type_0 = const()[name = tensor("k_13_pad_type_0"), val = tensor("custom")]; + tensor k_13_pad_0 = const()[name = tensor("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220220736)))]; + tensor k_13_cast_fp16 = conv(dilations = var_840, groups = var_779, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = var_838, weight = blocks_1_attn_k_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_844 = const()[name = tensor("op_844"), val = tensor([1, 1])]; + tensor var_846 = const()[name = tensor("op_846"), val = tensor([1, 1])]; + tensor v_11_pad_type_0 = const()[name = tensor("v_11_pad_type_0"), val = tensor("custom")]; + tensor v_11_pad_0 = const()[name = tensor("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226512256)))]; + tensor v_11_cast_fp16 = conv(dilations = var_846, groups = var_779, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = var_844, weight = blocks_1_attn_v_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_849 = const()[name = tensor("op_849"), val = tensor([1, 24, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_849, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_851 = const()[name = tensor("op_851"), val = tensor([1, -1, 128, 64])]; + tensor k_15_cast_fp16 = reshape(shape = var_851, x = k_13_cast_fp16)[name = tensor("k_15_cast_fp16")]; + tensor var_865_begin_0 = const()[name = tensor("op_865_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_865_end_0 = const()[name = tensor("op_865_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_865_end_mask_0 = const()[name = tensor("op_865_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_865_cast_fp16")]; + tensor var_871_begin_0 = const()[name = tensor("op_871_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_871_end_0 = const()[name = tensor("op_871_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_871_end_mask_0 = const()[name = tensor("op_871_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_871_cast_fp16 = slice_by_index(begin = var_871_begin_0, end = var_871_end_0, end_mask = var_871_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_871_cast_fp16")]; + tensor const_30_promoted_to_fp16 = const()[name = tensor("const_30_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_873_cast_fp16 = mul(x = var_871_cast_fp16, y = const_30_promoted_to_fp16)[name = tensor("op_873_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_744, interleave = rotated_5_interleave_0, values = (var_873_cast_fp16, var_865_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_876_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_876_cast_fp16")]; + tensor var_877_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_877_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_876_cast_fp16, y = var_877_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_890_begin_0 = const()[name = tensor("op_890_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_890_end_0 = const()[name = tensor("op_890_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_890_end_mask_0 = const()[name = tensor("op_890_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_890_cast_fp16 = slice_by_index(begin = var_890_begin_0, end = var_890_end_0, end_mask = var_890_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_890_cast_fp16")]; + tensor var_896_begin_0 = const()[name = tensor("op_896_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_896_end_0 = const()[name = tensor("op_896_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_896_end_mask_0 = const()[name = tensor("op_896_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_896_cast_fp16 = slice_by_index(begin = var_896_begin_0, end = var_896_end_0, end_mask = var_896_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_896_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_898_cast_fp16 = mul(x = var_896_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_898_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_744, interleave = rotated_interleave_0, values = (var_898_cast_fp16, var_890_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_901_cast_fp16 = mul(x = k_15_cast_fp16, y = cos)[name = tensor("op_901_cast_fp16")]; + tensor var_902_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_902_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_901_cast_fp16, y = var_902_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_905 = const()[name = tensor("op_905"), val = tensor([1, -1, 1, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_905, x = roped_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_907 = const()[name = tensor("op_907"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_907, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_21_perm_0, x = k_19_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_746, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_740, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_915 = const()[name = tensor("op_915"), val = tensor([1, 3072, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_915, x = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_920_begin_0 = const()[name = tensor("op_920_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_end_0 = const()[name = tensor("op_920_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_920_end_mask_0 = const()[name = tensor("op_920_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = q_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor var_924_begin_0 = const()[name = tensor("op_924_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_924_end_0 = const()[name = tensor("op_924_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_924_end_mask_0 = const()[name = tensor("op_924_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_924_cast_fp16 = slice_by_index(begin = var_924_begin_0, end = var_924_end_0, end_mask = var_924_end_mask_0, x = q_cast_fp16)[name = tensor("op_924_cast_fp16")]; + tensor var_928_begin_0 = const()[name = tensor("op_928_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_928_end_0 = const()[name = tensor("op_928_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_928_end_mask_0 = const()[name = tensor("op_928_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = q_cast_fp16)[name = tensor("op_928_cast_fp16")]; + tensor var_932_begin_0 = const()[name = tensor("op_932_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_932_end_0 = const()[name = tensor("op_932_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_932_end_mask_0 = const()[name = tensor("op_932_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_932_cast_fp16 = slice_by_index(begin = var_932_begin_0, end = var_932_end_0, end_mask = var_932_end_mask_0, x = q_cast_fp16)[name = tensor("op_932_cast_fp16")]; + tensor var_936_begin_0 = const()[name = tensor("op_936_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_936_end_0 = const()[name = tensor("op_936_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_936_end_mask_0 = const()[name = tensor("op_936_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_936_cast_fp16 = slice_by_index(begin = var_936_begin_0, end = var_936_end_0, end_mask = var_936_end_mask_0, x = q_cast_fp16)[name = tensor("op_936_cast_fp16")]; + tensor var_940_begin_0 = const()[name = tensor("op_940_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_940_end_0 = const()[name = tensor("op_940_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_940_end_mask_0 = const()[name = tensor("op_940_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_940_cast_fp16 = slice_by_index(begin = var_940_begin_0, end = var_940_end_0, end_mask = var_940_end_mask_0, x = q_cast_fp16)[name = tensor("op_940_cast_fp16")]; + tensor var_944_begin_0 = const()[name = tensor("op_944_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_944_end_0 = const()[name = tensor("op_944_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_944_end_mask_0 = const()[name = tensor("op_944_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_944_cast_fp16 = slice_by_index(begin = var_944_begin_0, end = var_944_end_0, end_mask = var_944_end_mask_0, x = q_cast_fp16)[name = tensor("op_944_cast_fp16")]; + tensor var_948_begin_0 = const()[name = tensor("op_948_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_948_end_0 = const()[name = tensor("op_948_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_948_end_mask_0 = const()[name = tensor("op_948_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_948_cast_fp16 = slice_by_index(begin = var_948_begin_0, end = var_948_end_0, end_mask = var_948_end_mask_0, x = q_cast_fp16)[name = tensor("op_948_cast_fp16")]; + tensor var_952_begin_0 = const()[name = tensor("op_952_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_952_end_0 = const()[name = tensor("op_952_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_952_end_mask_0 = const()[name = tensor("op_952_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_952_cast_fp16 = slice_by_index(begin = var_952_begin_0, end = var_952_end_0, end_mask = var_952_end_mask_0, x = q_cast_fp16)[name = tensor("op_952_cast_fp16")]; + tensor var_956_begin_0 = const()[name = tensor("op_956_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_956_end_0 = const()[name = tensor("op_956_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_956_end_mask_0 = const()[name = tensor("op_956_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_956_cast_fp16 = slice_by_index(begin = var_956_begin_0, end = var_956_end_0, end_mask = var_956_end_mask_0, x = q_cast_fp16)[name = tensor("op_956_cast_fp16")]; + tensor var_960_begin_0 = const()[name = tensor("op_960_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_960_end_0 = const()[name = tensor("op_960_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_960_end_mask_0 = const()[name = tensor("op_960_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_960_cast_fp16 = slice_by_index(begin = var_960_begin_0, end = var_960_end_0, end_mask = var_960_end_mask_0, x = q_cast_fp16)[name = tensor("op_960_cast_fp16")]; + tensor var_964_begin_0 = const()[name = tensor("op_964_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_964_end_0 = const()[name = tensor("op_964_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_964_end_mask_0 = const()[name = tensor("op_964_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_964_cast_fp16 = slice_by_index(begin = var_964_begin_0, end = var_964_end_0, end_mask = var_964_end_mask_0, x = q_cast_fp16)[name = tensor("op_964_cast_fp16")]; + tensor var_968_begin_0 = const()[name = tensor("op_968_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_968_end_0 = const()[name = tensor("op_968_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_968_end_mask_0 = const()[name = tensor("op_968_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = q_cast_fp16)[name = tensor("op_968_cast_fp16")]; + tensor var_972_begin_0 = const()[name = tensor("op_972_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_972_end_0 = const()[name = tensor("op_972_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_972_end_mask_0 = const()[name = tensor("op_972_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = q_cast_fp16)[name = tensor("op_972_cast_fp16")]; + tensor var_976_begin_0 = const()[name = tensor("op_976_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_976_end_0 = const()[name = tensor("op_976_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_976_end_mask_0 = const()[name = tensor("op_976_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = var_976_end_0, end_mask = var_976_end_mask_0, x = q_cast_fp16)[name = tensor("op_976_cast_fp16")]; + tensor var_980_begin_0 = const()[name = tensor("op_980_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_980_end_0 = const()[name = tensor("op_980_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_980_end_mask_0 = const()[name = tensor("op_980_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_980_cast_fp16 = slice_by_index(begin = var_980_begin_0, end = var_980_end_0, end_mask = var_980_end_mask_0, x = q_cast_fp16)[name = tensor("op_980_cast_fp16")]; + tensor var_984_begin_0 = const()[name = tensor("op_984_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_984_end_0 = const()[name = tensor("op_984_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_984_end_mask_0 = const()[name = tensor("op_984_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_984_cast_fp16 = slice_by_index(begin = var_984_begin_0, end = var_984_end_0, end_mask = var_984_end_mask_0, x = q_cast_fp16)[name = tensor("op_984_cast_fp16")]; + tensor var_988_begin_0 = const()[name = tensor("op_988_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_988_end_0 = const()[name = tensor("op_988_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_988_end_mask_0 = const()[name = tensor("op_988_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_988_cast_fp16 = slice_by_index(begin = var_988_begin_0, end = var_988_end_0, end_mask = var_988_end_mask_0, x = q_cast_fp16)[name = tensor("op_988_cast_fp16")]; + tensor var_992_begin_0 = const()[name = tensor("op_992_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_992_end_0 = const()[name = tensor("op_992_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_992_end_mask_0 = const()[name = tensor("op_992_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_992_cast_fp16 = slice_by_index(begin = var_992_begin_0, end = var_992_end_0, end_mask = var_992_end_mask_0, x = q_cast_fp16)[name = tensor("op_992_cast_fp16")]; + tensor var_996_begin_0 = const()[name = tensor("op_996_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_996_end_0 = const()[name = tensor("op_996_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_996_end_mask_0 = const()[name = tensor("op_996_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_996_cast_fp16 = slice_by_index(begin = var_996_begin_0, end = var_996_end_0, end_mask = var_996_end_mask_0, x = q_cast_fp16)[name = tensor("op_996_cast_fp16")]; + tensor var_1000_begin_0 = const()[name = tensor("op_1000_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1000_end_0 = const()[name = tensor("op_1000_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1000_end_mask_0 = const()[name = tensor("op_1000_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = var_1000_end_0, end_mask = var_1000_end_mask_0, x = q_cast_fp16)[name = tensor("op_1000_cast_fp16")]; + tensor var_1004_begin_0 = const()[name = tensor("op_1004_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1004_end_0 = const()[name = tensor("op_1004_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1004_end_mask_0 = const()[name = tensor("op_1004_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1004_cast_fp16 = slice_by_index(begin = var_1004_begin_0, end = var_1004_end_0, end_mask = var_1004_end_mask_0, x = q_cast_fp16)[name = tensor("op_1004_cast_fp16")]; + tensor var_1008_begin_0 = const()[name = tensor("op_1008_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1008_end_0 = const()[name = tensor("op_1008_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1008_end_mask_0 = const()[name = tensor("op_1008_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = var_1008_end_0, end_mask = var_1008_end_mask_0, x = q_cast_fp16)[name = tensor("op_1008_cast_fp16")]; + tensor var_1012_begin_0 = const()[name = tensor("op_1012_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1012_end_0 = const()[name = tensor("op_1012_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1012_end_mask_0 = const()[name = tensor("op_1012_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1012_cast_fp16 = slice_by_index(begin = var_1012_begin_0, end = var_1012_end_0, end_mask = var_1012_end_mask_0, x = q_cast_fp16)[name = tensor("op_1012_cast_fp16")]; + tensor var_1018_begin_0 = const()[name = tensor("op_1018_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1018_end_0 = const()[name = tensor("op_1018_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1018_end_mask_0 = const()[name = tensor("op_1018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1018_cast_fp16 = slice_by_index(begin = var_1018_begin_0, end = var_1018_end_0, end_mask = var_1018_end_mask_0, x = k_cast_fp16)[name = tensor("op_1018_cast_fp16")]; + tensor var_1030_begin_0 = const()[name = tensor("op_1030_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1030_end_0 = const()[name = tensor("op_1030_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1030_end_mask_0 = const()[name = tensor("op_1030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, x = k_cast_fp16)[name = tensor("op_1030_cast_fp16")]; + tensor var_1042_begin_0 = const()[name = tensor("op_1042_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1042_end_0 = const()[name = tensor("op_1042_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1042_end_mask_0 = const()[name = tensor("op_1042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = k_cast_fp16)[name = tensor("op_1042_cast_fp16")]; + tensor var_1054_begin_0 = const()[name = tensor("op_1054_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1054_end_0 = const()[name = tensor("op_1054_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1054_end_mask_0 = const()[name = tensor("op_1054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1054_cast_fp16 = slice_by_index(begin = var_1054_begin_0, end = var_1054_end_0, end_mask = var_1054_end_mask_0, x = k_cast_fp16)[name = tensor("op_1054_cast_fp16")]; + tensor var_1066_begin_0 = const()[name = tensor("op_1066_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1066_end_0 = const()[name = tensor("op_1066_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1066_end_mask_0 = const()[name = tensor("op_1066_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1066_cast_fp16 = slice_by_index(begin = var_1066_begin_0, end = var_1066_end_0, end_mask = var_1066_end_mask_0, x = k_cast_fp16)[name = tensor("op_1066_cast_fp16")]; + tensor var_1078_begin_0 = const()[name = tensor("op_1078_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1078_end_0 = const()[name = tensor("op_1078_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1078_end_mask_0 = const()[name = tensor("op_1078_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1078_cast_fp16 = slice_by_index(begin = var_1078_begin_0, end = var_1078_end_0, end_mask = var_1078_end_mask_0, x = k_cast_fp16)[name = tensor("op_1078_cast_fp16")]; + tensor var_1090_begin_0 = const()[name = tensor("op_1090_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1090_end_0 = const()[name = tensor("op_1090_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1090_end_mask_0 = const()[name = tensor("op_1090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = k_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor var_1102_begin_0 = const()[name = tensor("op_1102_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1102_end_0 = const()[name = tensor("op_1102_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1102_end_mask_0 = const()[name = tensor("op_1102_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1102_cast_fp16 = slice_by_index(begin = var_1102_begin_0, end = var_1102_end_0, end_mask = var_1102_end_mask_0, x = k_cast_fp16)[name = tensor("op_1102_cast_fp16")]; + tensor var_1112_begin_0 = const()[name = tensor("op_1112_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1112_end_0 = const()[name = tensor("op_1112_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1112_end_mask_0 = const()[name = tensor("op_1112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1112_cast_fp16")]; + tensor var_1124_begin_0 = const()[name = tensor("op_1124_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1124_end_0 = const()[name = tensor("op_1124_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1124_end_mask_0 = const()[name = tensor("op_1124_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1124_cast_fp16")]; + tensor var_1136_begin_0 = const()[name = tensor("op_1136_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1136_end_0 = const()[name = tensor("op_1136_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1136_end_mask_0 = const()[name = tensor("op_1136_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1136_cast_fp16")]; + tensor var_1148_begin_0 = const()[name = tensor("op_1148_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1148_end_0 = const()[name = tensor("op_1148_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1148_end_mask_0 = const()[name = tensor("op_1148_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1148_cast_fp16")]; + tensor var_1160_begin_0 = const()[name = tensor("op_1160_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1160_end_0 = const()[name = tensor("op_1160_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1160_end_mask_0 = const()[name = tensor("op_1160_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1160_cast_fp16")]; + tensor var_1172_begin_0 = const()[name = tensor("op_1172_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1172_end_0 = const()[name = tensor("op_1172_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1172_end_mask_0 = const()[name = tensor("op_1172_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1172_cast_fp16 = slice_by_index(begin = var_1172_begin_0, end = var_1172_end_0, end_mask = var_1172_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1172_cast_fp16")]; + tensor var_1184_begin_0 = const()[name = tensor("op_1184_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1184_end_0 = const()[name = tensor("op_1184_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1184_end_mask_0 = const()[name = tensor("op_1184_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1184_cast_fp16 = slice_by_index(begin = var_1184_begin_0, end = var_1184_end_0, end_mask = var_1184_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1184_cast_fp16")]; + tensor var_1196_begin_0 = const()[name = tensor("op_1196_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1196_end_0 = const()[name = tensor("op_1196_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1196_end_mask_0 = const()[name = tensor("op_1196_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1196_cast_fp16")]; + tensor var_1208_equation_0 = const()[name = tensor("op_1208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1208_cast_fp16 = einsum(equation = var_1208_equation_0, values = (var_1018_cast_fp16, var_920_cast_fp16))[name = tensor("op_1208_cast_fp16")]; + tensor var_1209_to_fp16 = const()[name = tensor("op_1209_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1210_cast_fp16 = mul(x = var_1208_cast_fp16, y = var_1209_to_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1212_equation_0 = const()[name = tensor("op_1212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1212_cast_fp16 = einsum(equation = var_1212_equation_0, values = (var_1018_cast_fp16, var_924_cast_fp16))[name = tensor("op_1212_cast_fp16")]; + tensor var_1213_to_fp16 = const()[name = tensor("op_1213_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1214_cast_fp16 = mul(x = var_1212_cast_fp16, y = var_1213_to_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1216_equation_0 = const()[name = tensor("op_1216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1216_cast_fp16 = einsum(equation = var_1216_equation_0, values = (var_1018_cast_fp16, var_928_cast_fp16))[name = tensor("op_1216_cast_fp16")]; + tensor var_1217_to_fp16 = const()[name = tensor("op_1217_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1218_cast_fp16 = mul(x = var_1216_cast_fp16, y = var_1217_to_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1220_equation_0 = const()[name = tensor("op_1220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1220_cast_fp16 = einsum(equation = var_1220_equation_0, values = (var_1030_cast_fp16, var_932_cast_fp16))[name = tensor("op_1220_cast_fp16")]; + tensor var_1221_to_fp16 = const()[name = tensor("op_1221_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1222_cast_fp16 = mul(x = var_1220_cast_fp16, y = var_1221_to_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1224_equation_0 = const()[name = tensor("op_1224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1224_cast_fp16 = einsum(equation = var_1224_equation_0, values = (var_1030_cast_fp16, var_936_cast_fp16))[name = tensor("op_1224_cast_fp16")]; + tensor var_1225_to_fp16 = const()[name = tensor("op_1225_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1226_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1030_cast_fp16, var_940_cast_fp16))[name = tensor("op_1228_cast_fp16")]; + tensor var_1229_to_fp16 = const()[name = tensor("op_1229_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1230_cast_fp16 = mul(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1042_cast_fp16, var_944_cast_fp16))[name = tensor("op_1232_cast_fp16")]; + tensor var_1233_to_fp16 = const()[name = tensor("op_1233_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1234_cast_fp16 = mul(x = var_1232_cast_fp16, y = var_1233_to_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1042_cast_fp16, var_948_cast_fp16))[name = tensor("op_1236_cast_fp16")]; + tensor var_1237_to_fp16 = const()[name = tensor("op_1237_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1238_cast_fp16 = mul(x = var_1236_cast_fp16, y = var_1237_to_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1042_cast_fp16, var_952_cast_fp16))[name = tensor("op_1240_cast_fp16")]; + tensor var_1241_to_fp16 = const()[name = tensor("op_1241_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1242_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1054_cast_fp16, var_956_cast_fp16))[name = tensor("op_1244_cast_fp16")]; + tensor var_1245_to_fp16 = const()[name = tensor("op_1245_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1246_cast_fp16 = mul(x = var_1244_cast_fp16, y = var_1245_to_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1054_cast_fp16, var_960_cast_fp16))[name = tensor("op_1248_cast_fp16")]; + tensor var_1249_to_fp16 = const()[name = tensor("op_1249_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1250_cast_fp16 = mul(x = var_1248_cast_fp16, y = var_1249_to_fp16)[name = tensor("op_1250_cast_fp16")]; + tensor var_1252_equation_0 = const()[name = tensor("op_1252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1054_cast_fp16, var_964_cast_fp16))[name = tensor("op_1252_cast_fp16")]; + tensor var_1253_to_fp16 = const()[name = tensor("op_1253_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1254_cast_fp16 = mul(x = var_1252_cast_fp16, y = var_1253_to_fp16)[name = tensor("op_1254_cast_fp16")]; + tensor var_1256_equation_0 = const()[name = tensor("op_1256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1066_cast_fp16, var_968_cast_fp16))[name = tensor("op_1256_cast_fp16")]; + tensor var_1257_to_fp16 = const()[name = tensor("op_1257_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1258_cast_fp16 = mul(x = var_1256_cast_fp16, y = var_1257_to_fp16)[name = tensor("op_1258_cast_fp16")]; + tensor var_1260_equation_0 = const()[name = tensor("op_1260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1066_cast_fp16, var_972_cast_fp16))[name = tensor("op_1260_cast_fp16")]; + tensor var_1261_to_fp16 = const()[name = tensor("op_1261_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1262_cast_fp16 = mul(x = var_1260_cast_fp16, y = var_1261_to_fp16)[name = tensor("op_1262_cast_fp16")]; + tensor var_1264_equation_0 = const()[name = tensor("op_1264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1066_cast_fp16, var_976_cast_fp16))[name = tensor("op_1264_cast_fp16")]; + tensor var_1265_to_fp16 = const()[name = tensor("op_1265_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1266_cast_fp16 = mul(x = var_1264_cast_fp16, y = var_1265_to_fp16)[name = tensor("op_1266_cast_fp16")]; + tensor var_1268_equation_0 = const()[name = tensor("op_1268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1078_cast_fp16, var_980_cast_fp16))[name = tensor("op_1268_cast_fp16")]; + tensor var_1269_to_fp16 = const()[name = tensor("op_1269_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1270_cast_fp16 = mul(x = var_1268_cast_fp16, y = var_1269_to_fp16)[name = tensor("op_1270_cast_fp16")]; + tensor var_1272_equation_0 = const()[name = tensor("op_1272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1078_cast_fp16, var_984_cast_fp16))[name = tensor("op_1272_cast_fp16")]; + tensor var_1273_to_fp16 = const()[name = tensor("op_1273_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1274_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = tensor("op_1274_cast_fp16")]; + tensor var_1276_equation_0 = const()[name = tensor("op_1276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1078_cast_fp16, var_988_cast_fp16))[name = tensor("op_1276_cast_fp16")]; + tensor var_1277_to_fp16 = const()[name = tensor("op_1277_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1278_cast_fp16 = mul(x = var_1276_cast_fp16, y = var_1277_to_fp16)[name = tensor("op_1278_cast_fp16")]; + tensor var_1280_equation_0 = const()[name = tensor("op_1280_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1090_cast_fp16, var_992_cast_fp16))[name = tensor("op_1280_cast_fp16")]; + tensor var_1281_to_fp16 = const()[name = tensor("op_1281_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1282_cast_fp16 = mul(x = var_1280_cast_fp16, y = var_1281_to_fp16)[name = tensor("op_1282_cast_fp16")]; + tensor var_1284_equation_0 = const()[name = tensor("op_1284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1284_cast_fp16 = einsum(equation = var_1284_equation_0, values = (var_1090_cast_fp16, var_996_cast_fp16))[name = tensor("op_1284_cast_fp16")]; + tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1286_cast_fp16 = mul(x = var_1284_cast_fp16, y = var_1285_to_fp16)[name = tensor("op_1286_cast_fp16")]; + tensor var_1288_equation_0 = const()[name = tensor("op_1288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1288_cast_fp16 = einsum(equation = var_1288_equation_0, values = (var_1090_cast_fp16, var_1000_cast_fp16))[name = tensor("op_1288_cast_fp16")]; + tensor var_1289_to_fp16 = const()[name = tensor("op_1289_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1290_cast_fp16 = mul(x = var_1288_cast_fp16, y = var_1289_to_fp16)[name = tensor("op_1290_cast_fp16")]; + tensor var_1292_equation_0 = const()[name = tensor("op_1292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1292_cast_fp16 = einsum(equation = var_1292_equation_0, values = (var_1102_cast_fp16, var_1004_cast_fp16))[name = tensor("op_1292_cast_fp16")]; + tensor var_1293_to_fp16 = const()[name = tensor("op_1293_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1294_cast_fp16 = mul(x = var_1292_cast_fp16, y = var_1293_to_fp16)[name = tensor("op_1294_cast_fp16")]; + tensor var_1296_equation_0 = const()[name = tensor("op_1296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1296_cast_fp16 = einsum(equation = var_1296_equation_0, values = (var_1102_cast_fp16, var_1008_cast_fp16))[name = tensor("op_1296_cast_fp16")]; + tensor var_1297_to_fp16 = const()[name = tensor("op_1297_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1298_cast_fp16 = mul(x = var_1296_cast_fp16, y = var_1297_to_fp16)[name = tensor("op_1298_cast_fp16")]; + tensor var_1300_equation_0 = const()[name = tensor("op_1300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1300_cast_fp16 = einsum(equation = var_1300_equation_0, values = (var_1102_cast_fp16, var_1012_cast_fp16))[name = tensor("op_1300_cast_fp16")]; + tensor var_1301_to_fp16 = const()[name = tensor("op_1301_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1302_cast_fp16 = mul(x = var_1300_cast_fp16, y = var_1301_to_fp16)[name = tensor("op_1302_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_1210_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_1214_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_1218_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_1222_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_1226_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_1230_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_1234_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_1238_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1242_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1246_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1250_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1254_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1258_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1262_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1266_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1270_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1274_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1278_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1282_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1286_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1290_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1294_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1298_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_1302_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_1327_cast_fp16 = softmax(axis = var_779, x = aw_49_cast_fp16)[name = tensor("op_1327_cast_fp16")]; + tensor var_1328_cast_fp16 = softmax(axis = var_779, x = aw_51_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1329_cast_fp16 = softmax(axis = var_779, x = aw_53_cast_fp16)[name = tensor("op_1329_cast_fp16")]; + tensor var_1330_cast_fp16 = softmax(axis = var_779, x = aw_55_cast_fp16)[name = tensor("op_1330_cast_fp16")]; + tensor var_1331_cast_fp16 = softmax(axis = var_779, x = aw_57_cast_fp16)[name = tensor("op_1331_cast_fp16")]; + tensor var_1332_cast_fp16 = softmax(axis = var_779, x = aw_59_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1333_cast_fp16 = softmax(axis = var_779, x = aw_61_cast_fp16)[name = tensor("op_1333_cast_fp16")]; + tensor var_1334_cast_fp16 = softmax(axis = var_779, x = aw_63_cast_fp16)[name = tensor("op_1334_cast_fp16")]; + tensor var_1335_cast_fp16 = softmax(axis = var_779, x = aw_65_cast_fp16)[name = tensor("op_1335_cast_fp16")]; + tensor var_1336_cast_fp16 = softmax(axis = var_779, x = aw_67_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1337_cast_fp16 = softmax(axis = var_779, x = aw_69_cast_fp16)[name = tensor("op_1337_cast_fp16")]; + tensor var_1338_cast_fp16 = softmax(axis = var_779, x = aw_71_cast_fp16)[name = tensor("op_1338_cast_fp16")]; + tensor var_1339_cast_fp16 = softmax(axis = var_779, x = aw_73_cast_fp16)[name = tensor("op_1339_cast_fp16")]; + tensor var_1340_cast_fp16 = softmax(axis = var_779, x = aw_75_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1341_cast_fp16 = softmax(axis = var_779, x = aw_77_cast_fp16)[name = tensor("op_1341_cast_fp16")]; + tensor var_1342_cast_fp16 = softmax(axis = var_779, x = aw_79_cast_fp16)[name = tensor("op_1342_cast_fp16")]; + tensor var_1343_cast_fp16 = softmax(axis = var_779, x = aw_81_cast_fp16)[name = tensor("op_1343_cast_fp16")]; + tensor var_1344_cast_fp16 = softmax(axis = var_779, x = aw_83_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1345_cast_fp16 = softmax(axis = var_779, x = aw_85_cast_fp16)[name = tensor("op_1345_cast_fp16")]; + tensor var_1346_cast_fp16 = softmax(axis = var_779, x = aw_87_cast_fp16)[name = tensor("op_1346_cast_fp16")]; + tensor var_1347_cast_fp16 = softmax(axis = var_779, x = aw_89_cast_fp16)[name = tensor("op_1347_cast_fp16")]; + tensor var_1348_cast_fp16 = softmax(axis = var_779, x = aw_91_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1349_cast_fp16 = softmax(axis = var_779, x = aw_93_cast_fp16)[name = tensor("op_1349_cast_fp16")]; + tensor var_1350_cast_fp16 = softmax(axis = var_779, x = aw_cast_fp16)[name = tensor("op_1350_cast_fp16")]; + tensor var_1352_equation_0 = const()[name = tensor("op_1352_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1352_cast_fp16 = einsum(equation = var_1352_equation_0, values = (var_1112_cast_fp16, var_1327_cast_fp16))[name = tensor("op_1352_cast_fp16")]; + tensor var_1354_equation_0 = const()[name = tensor("op_1354_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1354_cast_fp16 = einsum(equation = var_1354_equation_0, values = (var_1112_cast_fp16, var_1328_cast_fp16))[name = tensor("op_1354_cast_fp16")]; + tensor var_1356_equation_0 = const()[name = tensor("op_1356_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1356_cast_fp16 = einsum(equation = var_1356_equation_0, values = (var_1112_cast_fp16, var_1329_cast_fp16))[name = tensor("op_1356_cast_fp16")]; + tensor var_1358_equation_0 = const()[name = tensor("op_1358_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1358_cast_fp16 = einsum(equation = var_1358_equation_0, values = (var_1124_cast_fp16, var_1330_cast_fp16))[name = tensor("op_1358_cast_fp16")]; + tensor var_1360_equation_0 = const()[name = tensor("op_1360_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1360_cast_fp16 = einsum(equation = var_1360_equation_0, values = (var_1124_cast_fp16, var_1331_cast_fp16))[name = tensor("op_1360_cast_fp16")]; + tensor var_1362_equation_0 = const()[name = tensor("op_1362_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1362_cast_fp16 = einsum(equation = var_1362_equation_0, values = (var_1124_cast_fp16, var_1332_cast_fp16))[name = tensor("op_1362_cast_fp16")]; + tensor var_1364_equation_0 = const()[name = tensor("op_1364_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1364_cast_fp16 = einsum(equation = var_1364_equation_0, values = (var_1136_cast_fp16, var_1333_cast_fp16))[name = tensor("op_1364_cast_fp16")]; + tensor var_1366_equation_0 = const()[name = tensor("op_1366_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1366_cast_fp16 = einsum(equation = var_1366_equation_0, values = (var_1136_cast_fp16, var_1334_cast_fp16))[name = tensor("op_1366_cast_fp16")]; + tensor var_1368_equation_0 = const()[name = tensor("op_1368_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1368_cast_fp16 = einsum(equation = var_1368_equation_0, values = (var_1136_cast_fp16, var_1335_cast_fp16))[name = tensor("op_1368_cast_fp16")]; + tensor var_1370_equation_0 = const()[name = tensor("op_1370_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1370_cast_fp16 = einsum(equation = var_1370_equation_0, values = (var_1148_cast_fp16, var_1336_cast_fp16))[name = tensor("op_1370_cast_fp16")]; + tensor var_1372_equation_0 = const()[name = tensor("op_1372_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1372_cast_fp16 = einsum(equation = var_1372_equation_0, values = (var_1148_cast_fp16, var_1337_cast_fp16))[name = tensor("op_1372_cast_fp16")]; + tensor var_1374_equation_0 = const()[name = tensor("op_1374_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1374_cast_fp16 = einsum(equation = var_1374_equation_0, values = (var_1148_cast_fp16, var_1338_cast_fp16))[name = tensor("op_1374_cast_fp16")]; + tensor var_1376_equation_0 = const()[name = tensor("op_1376_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1376_cast_fp16 = einsum(equation = var_1376_equation_0, values = (var_1160_cast_fp16, var_1339_cast_fp16))[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_equation_0 = const()[name = tensor("op_1378_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1378_cast_fp16 = einsum(equation = var_1378_equation_0, values = (var_1160_cast_fp16, var_1340_cast_fp16))[name = tensor("op_1378_cast_fp16")]; + tensor var_1380_equation_0 = const()[name = tensor("op_1380_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1380_cast_fp16 = einsum(equation = var_1380_equation_0, values = (var_1160_cast_fp16, var_1341_cast_fp16))[name = tensor("op_1380_cast_fp16")]; + tensor var_1382_equation_0 = const()[name = tensor("op_1382_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1382_cast_fp16 = einsum(equation = var_1382_equation_0, values = (var_1172_cast_fp16, var_1342_cast_fp16))[name = tensor("op_1382_cast_fp16")]; + tensor var_1384_equation_0 = const()[name = tensor("op_1384_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1384_cast_fp16 = einsum(equation = var_1384_equation_0, values = (var_1172_cast_fp16, var_1343_cast_fp16))[name = tensor("op_1384_cast_fp16")]; + tensor var_1386_equation_0 = const()[name = tensor("op_1386_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1386_cast_fp16 = einsum(equation = var_1386_equation_0, values = (var_1172_cast_fp16, var_1344_cast_fp16))[name = tensor("op_1386_cast_fp16")]; + tensor var_1388_equation_0 = const()[name = tensor("op_1388_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1388_cast_fp16 = einsum(equation = var_1388_equation_0, values = (var_1184_cast_fp16, var_1345_cast_fp16))[name = tensor("op_1388_cast_fp16")]; + tensor var_1390_equation_0 = const()[name = tensor("op_1390_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1390_cast_fp16 = einsum(equation = var_1390_equation_0, values = (var_1184_cast_fp16, var_1346_cast_fp16))[name = tensor("op_1390_cast_fp16")]; + tensor var_1392_equation_0 = const()[name = tensor("op_1392_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1392_cast_fp16 = einsum(equation = var_1392_equation_0, values = (var_1184_cast_fp16, var_1347_cast_fp16))[name = tensor("op_1392_cast_fp16")]; + tensor var_1394_equation_0 = const()[name = tensor("op_1394_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1394_cast_fp16 = einsum(equation = var_1394_equation_0, values = (var_1196_cast_fp16, var_1348_cast_fp16))[name = tensor("op_1394_cast_fp16")]; + tensor var_1396_equation_0 = const()[name = tensor("op_1396_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1396_cast_fp16 = einsum(equation = var_1396_equation_0, values = (var_1196_cast_fp16, var_1349_cast_fp16))[name = tensor("op_1396_cast_fp16")]; + tensor var_1398_equation_0 = const()[name = tensor("op_1398_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1398_cast_fp16 = einsum(equation = var_1398_equation_0, values = (var_1196_cast_fp16, var_1350_cast_fp16))[name = tensor("op_1398_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_779, interleave = x_27_interleave_0, values = (var_1352_cast_fp16, var_1354_cast_fp16, var_1356_cast_fp16, var_1358_cast_fp16, var_1360_cast_fp16, var_1362_cast_fp16, var_1364_cast_fp16, var_1366_cast_fp16, var_1368_cast_fp16, var_1370_cast_fp16, var_1372_cast_fp16, var_1374_cast_fp16, var_1376_cast_fp16, var_1378_cast_fp16, var_1380_cast_fp16, var_1382_cast_fp16, var_1384_cast_fp16, var_1386_cast_fp16, var_1388_cast_fp16, var_1390_cast_fp16, var_1392_cast_fp16, var_1394_cast_fp16, var_1396_cast_fp16, var_1398_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1403 = const()[name = tensor("op_1403"), val = tensor([1, 3072, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1403, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1406 = const()[name = tensor("op_1406"), val = tensor([1, 1])]; + tensor var_1408 = const()[name = tensor("op_1408"), val = tensor([1, 1])]; + tensor attention_output_pad_type_0 = const()[name = tensor("attention_output_pad_type_0"), val = tensor("custom")]; + tensor attention_output_pad_0 = const()[name = tensor("attention_output_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232803776)))]; + tensor attention_output_cast_fp16 = conv(dilations = var_1408, groups = var_779, pad = attention_output_pad_0, pad_type = attention_output_pad_type_0, strides = var_1406, weight = blocks_1_attn_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678208)))]; + tensor x_eps_cast_fp16 = concat(axis = var_779, interleave = x_eps_interleave_0, values = (x_29_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_782, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1434_to_fp16 = const()[name = tensor("op_1434_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1434_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678400)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1445 = const()[name = tensor("op_1445"), val = tensor([1, 1])]; + tensor var_1447 = const()[name = tensor("op_1447"), val = tensor([1, 1])]; + tensor input_17_pad_type_0 = const()[name = tensor("input_17_pad_type_0"), val = tensor("custom")]; + tensor input_17_pad_0 = const()[name = tensor("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251684608)))]; + tensor input_17_cast_fp16 = conv(dilations = var_1447, groups = var_779, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_1445, weight = blocks_1_mlp_fc_1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1451 = const()[name = tensor("op_1451"), val = tensor([1, 1])]; + tensor var_1453 = const()[name = tensor("op_1453"), val = tensor([1, 1])]; + tensor x_fc_2_pad_type_0 = const()[name = tensor("x_fc_2_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_pad_0 = const()[name = tensor("x_fc_2_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302016320)))]; + tensor x_fc_2_cast_fp16 = conv(dilations = var_1453, groups = var_779, pad = x_fc_2_pad_0, pad_type = x_fc_2_pad_type_0, strides = var_1451, weight = blocks_1_mlp_fc_2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_1456_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1456_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_1456_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_1459 = const()[name = tensor("op_1459"), val = tensor([1, 1])]; + tensor var_1461 = const()[name = tensor("op_1461"), val = tensor([1, 1])]; + tensor var_1463_pad_type_0 = const()[name = tensor("op_1463_pad_type_0"), val = tensor("custom")]; + tensor var_1463_pad_0 = const()[name = tensor("op_1463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(352348032)))]; + tensor var_1463_cast_fp16 = conv(dilations = var_1461, groups = var_779, pad = var_1463_pad_0, pad_type = var_1463_pad_type_0, strides = var_1459, weight = blocks_1_mlp_proj_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_1463_cast_fp16")]; + tensor new_x = add(x = var_1463_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_1464_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_v_cache_0, new_k_cache_1, new_v_cache_1); +} \ No newline at end of file diff --git a/Llama-3.2-3B-Instruct_chunk9.mlmodelc/weights/weight.bin b/Llama-3.2-3B-Instruct_chunk9.mlmodelc/weights/weight.bin new file mode 100644 index 0000000000000000000000000000000000000000..62d28c7a01c5bae87fe29ec74613211c42ca660c --- /dev/null +++ b/Llama-3.2-3B-Instruct_chunk9.mlmodelc/weights/weight.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22ea162c1647c03b50865bc80a45486ce5e1894f3a99303ffd2eb6130ddd03ea +size 402679744 diff --git a/README.md b/README.md index f56069a87407fbb7a92032413000c0bc73ba8e6f..e531936aeba496b6b6fcf20ef4293426582dc3ca 100644 --- a/README.md +++ b/README.md @@ -7,4 +7,7 @@ tags: --- CoreML conversion of Llama-3.2-3B-Instruct with a 512 context length. Optimized for Apple Neural Engine. -Use [this CLI](https://github.com/smpanaro/coreml-llm-cli) to download and run inference. macOS 14 (Sonoma) is required. \ No newline at end of file +Use [this CLI](https://github.com/smpanaro/coreml-llm-cli) to download and run inference. macOS 14 (Sonoma) is required. + +> [!IMPORTANT] +> This model will likley run slowly or not at all on M1 Macs and phones. Consider trying the 1B model for those devices: [smpanaro/Llama-3.2-1B-Instruct-CoreML](https://huggingface.co/smpanaro/Llama-3.2-1B-Instruct-CoreML) diff --git a/cache-processor.mlmodelc/analytics/coremldata.bin b/cache-processor.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..d9bd15c91118d80231cd2259c2b4c6c413d8a13f --- /dev/null +++ b/cache-processor.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d8cf834d873b9ea34d7e6bb1e91b85769921caaefb423f8436aa4b9dd0df2e83 +size 243 diff --git a/cache-processor.mlmodelc/coremldata.bin b/cache-processor.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..3f43a5cb71c8fa96309501d24c5f22312ac1829a --- /dev/null +++ b/cache-processor.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68d2e54606b35f52fddfec1f3deac45d4faf67b1cf1361355809e2a70b28c854 +size 516 diff --git a/cache-processor.mlmodelc/metadata.json b/cache-processor.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..5ed61ea98bb3055414244128b7df7b0cb5490cae --- /dev/null +++ b/cache-processor.mlmodelc/metadata.json @@ -0,0 +1,109 @@ +[ + { + "metadataOutputVersion" : "3.0", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "updated_k_cache", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "updated_v_cache", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16)", + "shortDescription" : "", + "shape" : "[]", + "name" : "ignore_me_im_only_here_so_this_runs_on_the_ane", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "SliceByIndex" : 2, + "Ios16.mul" : 1, + "Concat" : 2, + "Ios16.reduceMin" : 1 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 448 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 448, 1, 1024]", + "name" : "old_k_cache", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 1 × 1024)", + "shortDescription" : "", + "shape" : "[1, 64, 1, 1024]", + "name" : "new_k_cache", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 448)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 448]", + "name" : "old_v_cache", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 1024 × 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 1024, 1, 64]", + "name" : "new_v_cache", + "type" : "MultiArray" + } + ], + "generatedClassName" : "cache_processor_l3_2_3b", + "method" : "predict" + } +] \ No newline at end of file diff --git a/cache-processor.mlmodelc/model.mil b/cache-processor.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..7b611aa68d184f0e9c256fd11c114a4d087f5db4 --- /dev/null +++ b/cache-processor.mlmodelc/model.mil @@ -0,0 +1,24 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor new_k_cache, tensor new_v_cache, tensor old_k_cache, tensor old_v_cache) { + tensor var_6 = const()[name = tensor("op_6"), val = tensor(-3)]; + tensor cat_k_1_interleave_0 = const()[name = tensor("cat_k_1_interleave_0"), val = tensor(false)]; + tensor cat_k_1_cast_fp16 = concat(axis = var_6, interleave = cat_k_1_interleave_0, values = (old_k_cache, new_k_cache))[name = tensor("cat_k_1_cast_fp16")]; + tensor var_9 = const()[name = tensor("op_9"), val = tensor(-1)]; + tensor cat_v_interleave_0 = const()[name = tensor("cat_v_interleave_0"), val = tensor(false)]; + tensor cat_v_cast_fp16 = concat(axis = var_9, interleave = cat_v_interleave_0, values = (old_v_cache, new_v_cache))[name = tensor("cat_v_cast_fp16")]; + tensor var_20_begin_0 = const()[name = tensor("op_20_begin_0"), val = tensor([0, 64, 0, 0])]; + tensor var_20_end_0 = const()[name = tensor("op_20_end_0"), val = tensor([1, 3072, 1, 1024])]; + tensor var_20_end_mask_0 = const()[name = tensor("op_20_end_mask_0"), val = tensor([true, false, true, true])]; + tensor updated_k_cache = slice_by_index(begin = var_20_begin_0, end = var_20_end_0, end_mask = var_20_end_mask_0, x = cat_k_1_cast_fp16)[name = tensor("op_20_cast_fp16")]; + tensor var_50_begin_0 = const()[name = tensor("op_50_begin_0"), val = tensor([0, 0, 0, 64])]; + tensor var_50_end_0 = const()[name = tensor("op_50_end_0"), val = tensor([1, 1024, 1, 3072])]; + tensor var_50_end_mask_0 = const()[name = tensor("op_50_end_mask_0"), val = tensor([true, true, true, false])]; + tensor updated_v_cache = slice_by_index(begin = var_50_begin_0, end = var_50_end_0, end_mask = var_50_end_mask_0, x = cat_v_cast_fp16)[name = tensor("op_50_cast_fp16")]; + tensor var_51_promoted_to_fp16 = const()[name = tensor("op_51_promoted_to_fp16"), val = tensor(0x1p+1)]; + tensor prod_cast_fp16 = mul(x = updated_k_cache, y = var_51_promoted_to_fp16)[name = tensor("prod_cast_fp16")]; + tensor var_53_keep_dims_0 = const()[name = tensor("op_53_keep_dims_0"), val = tensor(false)]; + tensor ignore_me_im_only_here_so_this_runs_on_the_ane = reduce_min(keep_dims = var_53_keep_dims_0, x = prod_cast_fp16)[name = tensor("op_53_cast_fp16")]; + } -> (updated_k_cache, updated_v_cache, ignore_me_im_only_here_so_this_runs_on_the_ane); +} \ No newline at end of file diff --git a/logit-processor.mlmodelc/analytics/coremldata.bin b/logit-processor.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..b2696a90e0dfd3e31204d90ad67fc865f4886cbe --- /dev/null +++ b/logit-processor.mlmodelc/analytics/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cea8f79e82c95d93f772797047802fb88c7fc82dfcef790e69a2f274a104623 +size 243 diff --git a/logit-processor.mlmodelc/coremldata.bin b/logit-processor.mlmodelc/coremldata.bin new file mode 100644 index 0000000000000000000000000000000000000000..243bcbd5aeec1943b455bc39f3c486bac5ad9601 --- /dev/null +++ b/logit-processor.mlmodelc/coremldata.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d70f289c1552a24ba6ca721405ea0653ac7125a91297ea3d32b30363c2afd3c +size 503 diff --git a/logit-processor.mlmodelc/metadata.json b/logit-processor.mlmodelc/metadata.json new file mode 100644 index 0000000000000000000000000000000000000000..3caa579651bae1c29f067eda4f9c2fcf2dfb4c5a --- /dev/null +++ b/logit-processor.mlmodelc/metadata.json @@ -0,0 +1,130 @@ +[ + { + "metadataOutputVersion" : "3.0", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Int32", + "formattedType" : "MultiArray (Int32 1 × 64)", + "shortDescription" : "", + "shape" : "[1, 64]", + "name" : "argmax", + "type" : "MultiArray" + } + ], + "modelParameters" : [ + + ], + "specificationVersion" : 7, + "mlProgramOperationTypeHistogram" : { + "Ios16.add" : 7, + "Ios16.topk" : 9, + "Ios16.gatherAlongAxis" : 1, + "Concat" : 2, + "Squeeze" : 1 + }, + "computePrecision" : "Mixed (Float16, Int32)", + "isUpdatable" : "0", + "availability" : { + "macOS" : "13.0", + "tvOS" : "16.0", + "visionOS" : "1.0", + "watchOS" : "9.0", + "iOS" : "16.0", + "macCatalyst" : "16.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "userDefinedMetadata" : { + "com.github.apple.coremltools.source_dialect" : "TorchScript", + "com.github.apple.coremltools.source" : "torch==2.1.0", + "com.github.apple.coremltools.version" : "8.0b1" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 16384)", + "shortDescription" : "", + "shape" : "[1, 64, 16384]", + "name" : "logits_0", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 16384)", + "shortDescription" : "", + "shape" : "[1, 64, 16384]", + "name" : "logits_1", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 16384)", + "shortDescription" : "", + "shape" : "[1, 64, 16384]", + "name" : "logits_2", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 16384)", + "shortDescription" : "", + "shape" : "[1, 64, 16384]", + "name" : "logits_3", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 16384)", + "shortDescription" : "", + "shape" : "[1, 64, 16384]", + "name" : "logits_4", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 16384)", + "shortDescription" : "", + "shape" : "[1, 64, 16384]", + "name" : "logits_5", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 16384)", + "shortDescription" : "", + "shape" : "[1, 64, 16384]", + "name" : "logits_6", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float16", + "formattedType" : "MultiArray (Float16 1 × 64 × 13568)", + "shortDescription" : "", + "shape" : "[1, 64, 13568]", + "name" : "logits_7", + "type" : "MultiArray" + } + ], + "generatedClassName" : "split_logit_processor", + "method" : "predict" + } +] \ No newline at end of file diff --git a/logit-processor.mlmodelc/model.mil b/logit-processor.mlmodelc/model.mil new file mode 100644 index 0000000000000000000000000000000000000000..17fc7abf6fab73f2ad1cb8147b0931606300c73a --- /dev/null +++ b/logit-processor.mlmodelc/model.mil @@ -0,0 +1,84 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor logits_0, tensor logits_1, tensor logits_2, tensor logits_3, tensor logits_4, tensor logits_5, tensor logits_6, tensor logits_7) { + tensor chunk_size = const()[name = tensor("chunk_size"), val = tensor([16384])]; + tensor var_12 = const()[name = tensor("op_12"), val = tensor(1)]; + tensor var_16_axis_0 = const()[name = tensor("op_16_axis_0"), val = tensor(-1)]; + tensor var_16_ascending_0 = const()[name = tensor("op_16_ascending_0"), val = tensor(false)]; + tensor var_16_sort_0 = const()[name = tensor("op_16_sort_0"), val = tensor(false)]; + tensor var_16_return_indices_0 = const()[name = tensor("op_16_return_indices_0"), val = tensor(true)]; + tensor var_16_cast_fp16_0, tensor var_16_cast_fp16_1 = topk(ascending = var_16_ascending_0, axis = var_16_axis_0, k = var_12, return_indices = var_16_return_indices_0, sort = var_16_sort_0, x = logits_0)[name = tensor("op_16_cast_fp16")]; + tensor var_22 = const()[name = tensor("op_22"), val = tensor(1)]; + tensor var_26_axis_0 = const()[name = tensor("op_26_axis_0"), val = tensor(-1)]; + tensor var_26_ascending_0 = const()[name = tensor("op_26_ascending_0"), val = tensor(false)]; + tensor var_26_sort_0 = const()[name = tensor("op_26_sort_0"), val = tensor(false)]; + tensor var_26_return_indices_0 = const()[name = tensor("op_26_return_indices_0"), val = tensor(true)]; + tensor var_26_cast_fp16_0, tensor var_26_cast_fp16_1 = topk(ascending = var_26_ascending_0, axis = var_26_axis_0, k = var_22, return_indices = var_26_return_indices_0, sort = var_26_sort_0, x = logits_1)[name = tensor("op_26_cast_fp16")]; + tensor var_31 = add(x = var_26_cast_fp16_1, y = chunk_size)[name = tensor("op_31")]; + tensor var_32 = const()[name = tensor("op_32"), val = tensor(1)]; + tensor var_36_axis_0 = const()[name = tensor("op_36_axis_0"), val = tensor(-1)]; + tensor var_36_ascending_0 = const()[name = tensor("op_36_ascending_0"), val = tensor(false)]; + tensor var_36_sort_0 = const()[name = tensor("op_36_sort_0"), val = tensor(false)]; + tensor var_36_return_indices_0 = const()[name = tensor("op_36_return_indices_0"), val = tensor(true)]; + tensor var_36_cast_fp16_0, tensor var_36_cast_fp16_1 = topk(ascending = var_36_ascending_0, axis = var_36_axis_0, k = var_32, return_indices = var_36_return_indices_0, sort = var_36_sort_0, x = logits_2)[name = tensor("op_36_cast_fp16")]; + tensor var_39 = const()[name = tensor("op_39"), val = tensor([32768])]; + tensor var_41 = add(x = var_36_cast_fp16_1, y = var_39)[name = tensor("op_41")]; + tensor var_42 = const()[name = tensor("op_42"), val = tensor(1)]; + tensor var_46_axis_0 = const()[name = tensor("op_46_axis_0"), val = tensor(-1)]; + tensor var_46_ascending_0 = const()[name = tensor("op_46_ascending_0"), val = tensor(false)]; + tensor var_46_sort_0 = const()[name = tensor("op_46_sort_0"), val = tensor(false)]; + tensor var_46_return_indices_0 = const()[name = tensor("op_46_return_indices_0"), val = tensor(true)]; + tensor var_46_cast_fp16_0, tensor var_46_cast_fp16_1 = topk(ascending = var_46_ascending_0, axis = var_46_axis_0, k = var_42, return_indices = var_46_return_indices_0, sort = var_46_sort_0, x = logits_3)[name = tensor("op_46_cast_fp16")]; + tensor var_49 = const()[name = tensor("op_49"), val = tensor([49152])]; + tensor var_51 = add(x = var_46_cast_fp16_1, y = var_49)[name = tensor("op_51")]; + tensor var_52 = const()[name = tensor("op_52"), val = tensor(1)]; + tensor var_56_axis_0 = const()[name = tensor("op_56_axis_0"), val = tensor(-1)]; + tensor var_56_ascending_0 = const()[name = tensor("op_56_ascending_0"), val = tensor(false)]; + tensor var_56_sort_0 = const()[name = tensor("op_56_sort_0"), val = tensor(false)]; + tensor var_56_return_indices_0 = const()[name = tensor("op_56_return_indices_0"), val = tensor(true)]; + tensor var_56_cast_fp16_0, tensor var_56_cast_fp16_1 = topk(ascending = var_56_ascending_0, axis = var_56_axis_0, k = var_52, return_indices = var_56_return_indices_0, sort = var_56_sort_0, x = logits_4)[name = tensor("op_56_cast_fp16")]; + tensor var_59 = const()[name = tensor("op_59"), val = tensor([65536])]; + tensor var_61 = add(x = var_56_cast_fp16_1, y = var_59)[name = tensor("op_61")]; + tensor var_62 = const()[name = tensor("op_62"), val = tensor(1)]; + tensor var_66_axis_0 = const()[name = tensor("op_66_axis_0"), val = tensor(-1)]; + tensor var_66_ascending_0 = const()[name = tensor("op_66_ascending_0"), val = tensor(false)]; + tensor var_66_sort_0 = const()[name = tensor("op_66_sort_0"), val = tensor(false)]; + tensor var_66_return_indices_0 = const()[name = tensor("op_66_return_indices_0"), val = tensor(true)]; + tensor var_66_cast_fp16_0, tensor var_66_cast_fp16_1 = topk(ascending = var_66_ascending_0, axis = var_66_axis_0, k = var_62, return_indices = var_66_return_indices_0, sort = var_66_sort_0, x = logits_5)[name = tensor("op_66_cast_fp16")]; + tensor var_69 = const()[name = tensor("op_69"), val = tensor([81920])]; + tensor var_71 = add(x = var_66_cast_fp16_1, y = var_69)[name = tensor("op_71")]; + tensor var_72 = const()[name = tensor("op_72"), val = tensor(1)]; + tensor var_76_axis_0 = const()[name = tensor("op_76_axis_0"), val = tensor(-1)]; + tensor var_76_ascending_0 = const()[name = tensor("op_76_ascending_0"), val = tensor(false)]; + tensor var_76_sort_0 = const()[name = tensor("op_76_sort_0"), val = tensor(false)]; + tensor var_76_return_indices_0 = const()[name = tensor("op_76_return_indices_0"), val = tensor(true)]; + tensor var_76_cast_fp16_0, tensor var_76_cast_fp16_1 = topk(ascending = var_76_ascending_0, axis = var_76_axis_0, k = var_72, return_indices = var_76_return_indices_0, sort = var_76_sort_0, x = logits_6)[name = tensor("op_76_cast_fp16")]; + tensor var_79 = const()[name = tensor("op_79"), val = tensor([98304])]; + tensor var_81 = add(x = var_76_cast_fp16_1, y = var_79)[name = tensor("op_81")]; + tensor var_82 = const()[name = tensor("op_82"), val = tensor(1)]; + tensor cv_axis_0 = const()[name = tensor("cv_axis_0"), val = tensor(-1)]; + tensor cv_ascending_0 = const()[name = tensor("cv_ascending_0"), val = tensor(false)]; + tensor cv_sort_0 = const()[name = tensor("cv_sort_0"), val = tensor(false)]; + tensor cv_return_indices_0 = const()[name = tensor("cv_return_indices_0"), val = tensor(true)]; + tensor cv_cast_fp16_0, tensor cv_cast_fp16_1 = topk(ascending = cv_ascending_0, axis = cv_axis_0, k = var_82, return_indices = cv_return_indices_0, sort = cv_sort_0, x = logits_7)[name = tensor("cv_cast_fp16")]; + tensor var_89 = const()[name = tensor("op_89"), val = tensor([114688])]; + tensor var_91 = add(x = cv_cast_fp16_1, y = var_89)[name = tensor("op_91")]; + tensor var_93 = const()[name = tensor("op_93"), val = tensor(-1)]; + tensor values_interleave_0 = const()[name = tensor("values_interleave_0"), val = tensor(false)]; + tensor values_cast_fp16 = concat(axis = var_93, interleave = values_interleave_0, values = (var_16_cast_fp16_0, var_26_cast_fp16_0, var_36_cast_fp16_0, var_46_cast_fp16_0, var_56_cast_fp16_0, var_66_cast_fp16_0, var_76_cast_fp16_0, cv_cast_fp16_0))[name = tensor("values_cast_fp16")]; + tensor var_96 = const()[name = tensor("op_96"), val = tensor(-1)]; + tensor indices_interleave_0 = const()[name = tensor("indices_interleave_0"), val = tensor(false)]; + tensor indices = concat(axis = var_96, interleave = indices_interleave_0, values = (var_16_cast_fp16_1, var_31, var_41, var_51, var_61, var_71, var_81, var_91))[name = tensor("indices")]; + tensor var_98 = const()[name = tensor("op_98"), val = tensor(1)]; + tensor var_102_axis_0 = const()[name = tensor("op_102_axis_0"), val = tensor(-1)]; + tensor var_102_ascending_0 = const()[name = tensor("op_102_ascending_0"), val = tensor(false)]; + tensor var_102_sort_0 = const()[name = tensor("op_102_sort_0"), val = tensor(true)]; + tensor var_102_return_indices_0 = const()[name = tensor("op_102_return_indices_0"), val = tensor(true)]; + tensor var_102_cast_fp16_0, tensor var_102_cast_fp16_1 = topk(ascending = var_102_ascending_0, axis = var_102_axis_0, k = var_98, return_indices = var_102_return_indices_0, sort = var_102_sort_0, x = values_cast_fp16)[name = tensor("op_102_cast_fp16")]; + tensor var_104 = const()[name = tensor("op_104"), val = tensor(-1)]; + tensor var_106 = gather_along_axis(axis = var_104, indices = var_102_cast_fp16_1, x = indices)[name = tensor("op_106")]; + tensor var_108_axes_0 = const()[name = tensor("op_108_axes_0"), val = tensor([-1])]; + tensor argmax = squeeze(axes = var_108_axes_0, x = var_106)[name = tensor("op_108")]; + } -> (argmax); +} \ No newline at end of file