diff --git "a/Llama-3.2-3B-Instruct_chunk2.mlmodelc/model.mil" "b/Llama-3.2-3B-Instruct_chunk2.mlmodelc/model.mil" new file mode 100644--- /dev/null +++ "b/Llama-3.2-3B-Instruct_chunk2.mlmodelc/model.mil" @@ -0,0 +1,956 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3304.5.2"}, {"coremlc-version", "3304.6.2"}, {"coremltools-component-torch", "2.1.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "8.0b1"}})] +{ + func main(tensor cos, tensor k_cache_0, tensor k_cache_1, tensor mask, tensor sin, tensor v_cache_0, tensor v_cache_1, tensor x) [CoreML_InputDefaultValues = dict, tensor>({{"k_cache_0", 0}, {"k_cache_1", 0}, {"v_cache_0", 0}, {"v_cache_1", 0}})] { + tensor var_13 = const()[name = tensor("op_13"), val = tensor(-1)]; + tensor var_17 = const()[name = tensor("op_17"), val = tensor(-2)]; + tensor var_19 = const()[name = tensor("op_19"), val = tensor(-3)]; + tensor var_52 = const()[name = tensor("op_52"), val = tensor(1)]; + tensor var_55 = const()[name = tensor("op_55"), val = tensor(true)]; + tensor x_eps_1_interleave_0 = const()[name = tensor("x_eps_1_interleave_0"), val = tensor(false)]; + tensor eps_chan_1_to_fp16 = const()[name = tensor("eps_chan_1_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor x_eps_1_cast_fp16 = concat(axis = var_52, interleave = x_eps_1_interleave_0, values = (x, eps_chan_1_to_fp16))[name = tensor("x_eps_1_cast_fp16")]; + tensor norm_x_1_axes_0 = const()[name = tensor("norm_x_1_axes_0"), val = tensor([1])]; + tensor norm_x_1_cast_fp16 = reduce_l2_norm(axes = norm_x_1_axes_0, keep_dims = var_55, x = x_eps_1_cast_fp16)[name = tensor("norm_x_1_cast_fp16")]; + tensor x_normed_1_cast_fp16 = real_div(x = x, y = norm_x_1_cast_fp16)[name = tensor("x_normed_1_cast_fp16")]; + tensor var_79_to_fp16 = const()[name = tensor("op_79_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_3_cast_fp16 = mul(x = x_normed_1_cast_fp16, y = var_79_to_fp16)[name = tensor("x_normed_3_cast_fp16")]; + tensor blocks_0_norm_1_weight_to_fp16 = const()[name = tensor("blocks_0_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(256)))]; + tensor x_5_cast_fp16 = mul(x = x_normed_3_cast_fp16, y = blocks_0_norm_1_weight_to_fp16)[name = tensor("x_5_cast_fp16")]; + tensor var_100 = const()[name = tensor("op_100"), val = tensor([1, 3072, 1, -1])]; + tensor input_1_cast_fp16 = reshape(shape = var_100, x = x_5_cast_fp16)[name = tensor("input_1_cast_fp16")]; + tensor var_103 = const()[name = tensor("op_103"), val = tensor([1, 1])]; + tensor var_105 = const()[name = tensor("op_105"), val = tensor([1, 1])]; + tensor q_1_pad_type_0 = const()[name = tensor("q_1_pad_type_0"), val = tensor("custom")]; + tensor q_1_pad_0 = const()[name = tensor("q_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(6464)))]; + tensor q_1_cast_fp16 = conv(dilations = var_105, groups = var_52, pad = q_1_pad_0, pad_type = q_1_pad_type_0, strides = var_103, weight = blocks_0_attn_q_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("q_1_cast_fp16")]; + tensor var_109 = const()[name = tensor("op_109"), val = tensor([1, 1])]; + tensor var_111 = const()[name = tensor("op_111"), val = tensor([1, 1])]; + tensor k_1_pad_type_0 = const()[name = tensor("k_1_pad_type_0"), val = tensor("custom")]; + tensor k_1_pad_0 = const()[name = tensor("k_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(18880896)))]; + tensor k_1_cast_fp16 = conv(dilations = var_111, groups = var_52, pad = k_1_pad_0, pad_type = k_1_pad_type_0, strides = var_109, weight = blocks_0_attn_k_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("k_1_cast_fp16")]; + tensor var_115 = const()[name = tensor("op_115"), val = tensor([1, 1])]; + tensor var_117 = const()[name = tensor("op_117"), val = tensor([1, 1])]; + tensor v_1_pad_type_0 = const()[name = tensor("v_1_pad_type_0"), val = tensor("custom")]; + tensor v_1_pad_0 = const()[name = tensor("v_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(25172416)))]; + tensor v_1_cast_fp16 = conv(dilations = var_117, groups = var_52, pad = v_1_pad_0, pad_type = v_1_pad_type_0, strides = var_115, weight = blocks_0_attn_v_proj_weight_to_fp16, x = input_1_cast_fp16)[name = tensor("v_1_cast_fp16")]; + tensor var_120 = const()[name = tensor("op_120"), val = tensor([1, 24, 128, 64])]; + tensor q_3_cast_fp16 = reshape(shape = var_120, x = q_1_cast_fp16)[name = tensor("q_3_cast_fp16")]; + tensor var_122 = const()[name = tensor("op_122"), val = tensor([1, -1, 128, 64])]; + tensor k_3_cast_fp16 = reshape(shape = var_122, x = k_1_cast_fp16)[name = tensor("k_3_cast_fp16")]; + tensor var_136_begin_0 = const()[name = tensor("op_136_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_136_end_0 = const()[name = tensor("op_136_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_136_end_mask_0 = const()[name = tensor("op_136_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_136_cast_fp16 = slice_by_index(begin = var_136_begin_0, end = var_136_end_0, end_mask = var_136_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_136_cast_fp16")]; + tensor var_142_begin_0 = const()[name = tensor("op_142_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_142_end_0 = const()[name = tensor("op_142_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_142_end_mask_0 = const()[name = tensor("op_142_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_142_cast_fp16 = slice_by_index(begin = var_142_begin_0, end = var_142_end_0, end_mask = var_142_end_mask_0, x = q_3_cast_fp16)[name = tensor("op_142_cast_fp16")]; + tensor const_10_promoted_to_fp16 = const()[name = tensor("const_10_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_144_cast_fp16 = mul(x = var_142_cast_fp16, y = const_10_promoted_to_fp16)[name = tensor("op_144_cast_fp16")]; + tensor rotated_1_interleave_0 = const()[name = tensor("rotated_1_interleave_0"), val = tensor(false)]; + tensor rotated_1_cast_fp16 = concat(axis = var_17, interleave = rotated_1_interleave_0, values = (var_144_cast_fp16, var_136_cast_fp16))[name = tensor("rotated_1_cast_fp16")]; + tensor var_147_cast_fp16 = mul(x = q_3_cast_fp16, y = cos)[name = tensor("op_147_cast_fp16")]; + tensor var_148_cast_fp16 = mul(x = rotated_1_cast_fp16, y = sin)[name = tensor("op_148_cast_fp16")]; + tensor roped_1_cast_fp16 = add(x = var_147_cast_fp16, y = var_148_cast_fp16)[name = tensor("roped_1_cast_fp16")]; + tensor var_161_begin_0 = const()[name = tensor("op_161_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_161_end_0 = const()[name = tensor("op_161_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_161_end_mask_0 = const()[name = tensor("op_161_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_161_cast_fp16 = slice_by_index(begin = var_161_begin_0, end = var_161_end_0, end_mask = var_161_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_161_cast_fp16")]; + tensor var_167_begin_0 = const()[name = tensor("op_167_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_167_end_0 = const()[name = tensor("op_167_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_167_end_mask_0 = const()[name = tensor("op_167_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_167_cast_fp16 = slice_by_index(begin = var_167_begin_0, end = var_167_end_0, end_mask = var_167_end_mask_0, x = k_3_cast_fp16)[name = tensor("op_167_cast_fp16")]; + tensor const_12_promoted_to_fp16 = const()[name = tensor("const_12_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_169_cast_fp16 = mul(x = var_167_cast_fp16, y = const_12_promoted_to_fp16)[name = tensor("op_169_cast_fp16")]; + tensor rotated_3_interleave_0 = const()[name = tensor("rotated_3_interleave_0"), val = tensor(false)]; + tensor rotated_3_cast_fp16 = concat(axis = var_17, interleave = rotated_3_interleave_0, values = (var_169_cast_fp16, var_161_cast_fp16))[name = tensor("rotated_3_cast_fp16")]; + tensor var_172_cast_fp16 = mul(x = k_3_cast_fp16, y = cos)[name = tensor("op_172_cast_fp16")]; + tensor var_173_cast_fp16 = mul(x = rotated_3_cast_fp16, y = sin)[name = tensor("op_173_cast_fp16")]; + tensor roped_3_cast_fp16 = add(x = var_172_cast_fp16, y = var_173_cast_fp16)[name = tensor("roped_3_cast_fp16")]; + tensor var_176 = const()[name = tensor("op_176"), val = tensor([1, -1, 1, 64])]; + tensor k_7_cast_fp16 = reshape(shape = var_176, x = roped_3_cast_fp16)[name = tensor("k_7_cast_fp16")]; + tensor var_178 = const()[name = tensor("op_178"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_0 = reshape(shape = var_178, x = v_1_cast_fp16)[name = tensor("new_v_cache_0_type_fp32_cast_fp16")]; + tensor k_9_perm_0 = const()[name = tensor("k_9_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_11_interleave_0 = const()[name = tensor("k_11_interleave_0"), val = tensor(false)]; + tensor new_k_cache_0 = transpose(perm = k_9_perm_0, x = k_7_cast_fp16)[name = tensor("transpose_1")]; + tensor k_11_cast_fp16 = concat(axis = var_19, interleave = k_11_interleave_0, values = (k_cache_0, new_k_cache_0))[name = tensor("k_11_cast_fp16")]; + tensor v_7_interleave_0 = const()[name = tensor("v_7_interleave_0"), val = tensor(false)]; + tensor v_7_cast_fp16 = concat(axis = var_13, interleave = v_7_interleave_0, values = (v_cache_0, new_v_cache_0))[name = tensor("v_7_cast_fp16")]; + tensor var_186 = const()[name = tensor("op_186"), val = tensor([1, 3072, 1, -1])]; + tensor q_7_cast_fp16 = reshape(shape = var_186, x = roped_1_cast_fp16)[name = tensor("q_7_cast_fp16")]; + tensor var_191_begin_0 = const()[name = tensor("op_191_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_191_end_0 = const()[name = tensor("op_191_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_191_end_mask_0 = const()[name = tensor("op_191_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_191_cast_fp16 = slice_by_index(begin = var_191_begin_0, end = var_191_end_0, end_mask = var_191_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_191_cast_fp16")]; + tensor var_195_begin_0 = const()[name = tensor("op_195_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_195_end_0 = const()[name = tensor("op_195_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_195_end_mask_0 = const()[name = tensor("op_195_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_195_cast_fp16 = slice_by_index(begin = var_195_begin_0, end = var_195_end_0, end_mask = var_195_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_195_cast_fp16")]; + tensor var_199_begin_0 = const()[name = tensor("op_199_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_199_end_0 = const()[name = tensor("op_199_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_199_end_mask_0 = const()[name = tensor("op_199_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_199_cast_fp16 = slice_by_index(begin = var_199_begin_0, end = var_199_end_0, end_mask = var_199_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_199_cast_fp16")]; + tensor var_203_begin_0 = const()[name = tensor("op_203_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_203_end_0 = const()[name = tensor("op_203_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_203_end_mask_0 = const()[name = tensor("op_203_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_203_cast_fp16 = slice_by_index(begin = var_203_begin_0, end = var_203_end_0, end_mask = var_203_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_203_cast_fp16")]; + tensor var_207_begin_0 = const()[name = tensor("op_207_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_207_end_0 = const()[name = tensor("op_207_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_207_end_mask_0 = const()[name = tensor("op_207_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_207_cast_fp16 = slice_by_index(begin = var_207_begin_0, end = var_207_end_0, end_mask = var_207_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_207_cast_fp16")]; + tensor var_211_begin_0 = const()[name = tensor("op_211_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_211_end_0 = const()[name = tensor("op_211_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_211_end_mask_0 = const()[name = tensor("op_211_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_211_cast_fp16 = slice_by_index(begin = var_211_begin_0, end = var_211_end_0, end_mask = var_211_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_211_cast_fp16")]; + tensor var_215_begin_0 = const()[name = tensor("op_215_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_215_end_0 = const()[name = tensor("op_215_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_215_end_mask_0 = const()[name = tensor("op_215_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_215_cast_fp16 = slice_by_index(begin = var_215_begin_0, end = var_215_end_0, end_mask = var_215_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_215_cast_fp16")]; + tensor var_219_begin_0 = const()[name = tensor("op_219_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_219_end_0 = const()[name = tensor("op_219_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_219_end_mask_0 = const()[name = tensor("op_219_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_219_cast_fp16 = slice_by_index(begin = var_219_begin_0, end = var_219_end_0, end_mask = var_219_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_219_cast_fp16")]; + tensor var_223_begin_0 = const()[name = tensor("op_223_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_223_end_0 = const()[name = tensor("op_223_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_223_end_mask_0 = const()[name = tensor("op_223_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_223_cast_fp16 = slice_by_index(begin = var_223_begin_0, end = var_223_end_0, end_mask = var_223_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_223_cast_fp16")]; + tensor var_227_begin_0 = const()[name = tensor("op_227_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_227_end_0 = const()[name = tensor("op_227_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_227_end_mask_0 = const()[name = tensor("op_227_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_227_cast_fp16 = slice_by_index(begin = var_227_begin_0, end = var_227_end_0, end_mask = var_227_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_227_cast_fp16")]; + tensor var_231_begin_0 = const()[name = tensor("op_231_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_231_end_0 = const()[name = tensor("op_231_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_231_end_mask_0 = const()[name = tensor("op_231_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_231_cast_fp16 = slice_by_index(begin = var_231_begin_0, end = var_231_end_0, end_mask = var_231_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_231_cast_fp16")]; + tensor var_235_begin_0 = const()[name = tensor("op_235_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_235_end_0 = const()[name = tensor("op_235_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_235_end_mask_0 = const()[name = tensor("op_235_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_235_cast_fp16 = slice_by_index(begin = var_235_begin_0, end = var_235_end_0, end_mask = var_235_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_235_cast_fp16")]; + tensor var_239_begin_0 = const()[name = tensor("op_239_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_239_end_0 = const()[name = tensor("op_239_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_239_end_mask_0 = const()[name = tensor("op_239_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_239_cast_fp16 = slice_by_index(begin = var_239_begin_0, end = var_239_end_0, end_mask = var_239_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_239_cast_fp16")]; + tensor var_243_begin_0 = const()[name = tensor("op_243_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_243_end_0 = const()[name = tensor("op_243_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_243_end_mask_0 = const()[name = tensor("op_243_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_243_cast_fp16 = slice_by_index(begin = var_243_begin_0, end = var_243_end_0, end_mask = var_243_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_243_cast_fp16")]; + tensor var_247_begin_0 = const()[name = tensor("op_247_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_247_end_0 = const()[name = tensor("op_247_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_247_end_mask_0 = const()[name = tensor("op_247_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_247_cast_fp16 = slice_by_index(begin = var_247_begin_0, end = var_247_end_0, end_mask = var_247_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_247_cast_fp16")]; + tensor var_251_begin_0 = const()[name = tensor("op_251_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_251_end_0 = const()[name = tensor("op_251_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_251_end_mask_0 = const()[name = tensor("op_251_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_251_cast_fp16 = slice_by_index(begin = var_251_begin_0, end = var_251_end_0, end_mask = var_251_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_251_cast_fp16")]; + tensor var_255_begin_0 = const()[name = tensor("op_255_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_255_end_0 = const()[name = tensor("op_255_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_255_end_mask_0 = const()[name = tensor("op_255_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_255_cast_fp16 = slice_by_index(begin = var_255_begin_0, end = var_255_end_0, end_mask = var_255_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_255_cast_fp16")]; + tensor var_259_begin_0 = const()[name = tensor("op_259_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_259_end_0 = const()[name = tensor("op_259_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_259_end_mask_0 = const()[name = tensor("op_259_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_259_cast_fp16 = slice_by_index(begin = var_259_begin_0, end = var_259_end_0, end_mask = var_259_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_259_cast_fp16")]; + tensor var_263_begin_0 = const()[name = tensor("op_263_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_263_end_0 = const()[name = tensor("op_263_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_263_end_mask_0 = const()[name = tensor("op_263_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_263_cast_fp16 = slice_by_index(begin = var_263_begin_0, end = var_263_end_0, end_mask = var_263_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_263_cast_fp16")]; + tensor var_267_begin_0 = const()[name = tensor("op_267_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_267_end_0 = const()[name = tensor("op_267_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_267_end_mask_0 = const()[name = tensor("op_267_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_267_cast_fp16 = slice_by_index(begin = var_267_begin_0, end = var_267_end_0, end_mask = var_267_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_267_cast_fp16")]; + tensor var_271_begin_0 = const()[name = tensor("op_271_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_271_end_0 = const()[name = tensor("op_271_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_271_end_mask_0 = const()[name = tensor("op_271_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_271_cast_fp16 = slice_by_index(begin = var_271_begin_0, end = var_271_end_0, end_mask = var_271_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_271_cast_fp16")]; + tensor var_275_begin_0 = const()[name = tensor("op_275_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_275_end_0 = const()[name = tensor("op_275_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_275_end_mask_0 = const()[name = tensor("op_275_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_275_cast_fp16 = slice_by_index(begin = var_275_begin_0, end = var_275_end_0, end_mask = var_275_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_275_cast_fp16")]; + tensor var_279_begin_0 = const()[name = tensor("op_279_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_279_end_0 = const()[name = tensor("op_279_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_279_end_mask_0 = const()[name = tensor("op_279_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_279_cast_fp16 = slice_by_index(begin = var_279_begin_0, end = var_279_end_0, end_mask = var_279_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_279_cast_fp16")]; + tensor var_283_begin_0 = const()[name = tensor("op_283_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_283_end_0 = const()[name = tensor("op_283_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_283_end_mask_0 = const()[name = tensor("op_283_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_283_cast_fp16 = slice_by_index(begin = var_283_begin_0, end = var_283_end_0, end_mask = var_283_end_mask_0, x = q_7_cast_fp16)[name = tensor("op_283_cast_fp16")]; + tensor var_289_begin_0 = const()[name = tensor("op_289_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_289_end_0 = const()[name = tensor("op_289_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_289_end_mask_0 = const()[name = tensor("op_289_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_289_cast_fp16 = slice_by_index(begin = var_289_begin_0, end = var_289_end_0, end_mask = var_289_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_289_cast_fp16")]; + tensor var_301_begin_0 = const()[name = tensor("op_301_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_301_end_0 = const()[name = tensor("op_301_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_301_end_mask_0 = const()[name = tensor("op_301_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_301_cast_fp16 = slice_by_index(begin = var_301_begin_0, end = var_301_end_0, end_mask = var_301_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_301_cast_fp16")]; + tensor var_313_begin_0 = const()[name = tensor("op_313_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_313_end_0 = const()[name = tensor("op_313_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_313_end_mask_0 = const()[name = tensor("op_313_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_313_cast_fp16 = slice_by_index(begin = var_313_begin_0, end = var_313_end_0, end_mask = var_313_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_313_cast_fp16")]; + tensor var_325_begin_0 = const()[name = tensor("op_325_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_325_end_0 = const()[name = tensor("op_325_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_325_end_mask_0 = const()[name = tensor("op_325_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_325_cast_fp16 = slice_by_index(begin = var_325_begin_0, end = var_325_end_0, end_mask = var_325_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_325_cast_fp16")]; + tensor var_337_begin_0 = const()[name = tensor("op_337_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_337_end_0 = const()[name = tensor("op_337_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_337_end_mask_0 = const()[name = tensor("op_337_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_337_cast_fp16 = slice_by_index(begin = var_337_begin_0, end = var_337_end_0, end_mask = var_337_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_337_cast_fp16")]; + tensor var_349_begin_0 = const()[name = tensor("op_349_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_349_end_0 = const()[name = tensor("op_349_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_349_end_mask_0 = const()[name = tensor("op_349_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_349_cast_fp16 = slice_by_index(begin = var_349_begin_0, end = var_349_end_0, end_mask = var_349_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_349_cast_fp16")]; + tensor var_361_begin_0 = const()[name = tensor("op_361_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_361_end_0 = const()[name = tensor("op_361_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_361_end_mask_0 = const()[name = tensor("op_361_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_361_cast_fp16 = slice_by_index(begin = var_361_begin_0, end = var_361_end_0, end_mask = var_361_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_361_cast_fp16")]; + tensor var_373_begin_0 = const()[name = tensor("op_373_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_373_end_0 = const()[name = tensor("op_373_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_373_end_mask_0 = const()[name = tensor("op_373_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_373_cast_fp16 = slice_by_index(begin = var_373_begin_0, end = var_373_end_0, end_mask = var_373_end_mask_0, x = k_11_cast_fp16)[name = tensor("op_373_cast_fp16")]; + tensor var_383_begin_0 = const()[name = tensor("op_383_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_383_end_0 = const()[name = tensor("op_383_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_383_end_mask_0 = const()[name = tensor("op_383_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_383_cast_fp16 = slice_by_index(begin = var_383_begin_0, end = var_383_end_0, end_mask = var_383_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_383_cast_fp16")]; + tensor var_395_begin_0 = const()[name = tensor("op_395_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_395_end_0 = const()[name = tensor("op_395_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_395_end_mask_0 = const()[name = tensor("op_395_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_395_cast_fp16 = slice_by_index(begin = var_395_begin_0, end = var_395_end_0, end_mask = var_395_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_395_cast_fp16")]; + tensor var_407_begin_0 = const()[name = tensor("op_407_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_407_end_0 = const()[name = tensor("op_407_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_407_end_mask_0 = const()[name = tensor("op_407_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_407_cast_fp16 = slice_by_index(begin = var_407_begin_0, end = var_407_end_0, end_mask = var_407_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_407_cast_fp16")]; + tensor var_419_begin_0 = const()[name = tensor("op_419_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_419_end_0 = const()[name = tensor("op_419_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_419_end_mask_0 = const()[name = tensor("op_419_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_419_cast_fp16 = slice_by_index(begin = var_419_begin_0, end = var_419_end_0, end_mask = var_419_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_419_cast_fp16")]; + tensor var_431_begin_0 = const()[name = tensor("op_431_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_431_end_0 = const()[name = tensor("op_431_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_431_end_mask_0 = const()[name = tensor("op_431_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_431_cast_fp16 = slice_by_index(begin = var_431_begin_0, end = var_431_end_0, end_mask = var_431_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_431_cast_fp16")]; + tensor var_443_begin_0 = const()[name = tensor("op_443_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_443_end_0 = const()[name = tensor("op_443_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_443_end_mask_0 = const()[name = tensor("op_443_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_443_cast_fp16 = slice_by_index(begin = var_443_begin_0, end = var_443_end_0, end_mask = var_443_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_443_cast_fp16")]; + tensor var_455_begin_0 = const()[name = tensor("op_455_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_455_end_0 = const()[name = tensor("op_455_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_455_end_mask_0 = const()[name = tensor("op_455_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_455_cast_fp16 = slice_by_index(begin = var_455_begin_0, end = var_455_end_0, end_mask = var_455_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_455_cast_fp16")]; + tensor var_467_begin_0 = const()[name = tensor("op_467_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_467_end_0 = const()[name = tensor("op_467_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_467_end_mask_0 = const()[name = tensor("op_467_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_467_cast_fp16 = slice_by_index(begin = var_467_begin_0, end = var_467_end_0, end_mask = var_467_end_mask_0, x = v_7_cast_fp16)[name = tensor("op_467_cast_fp16")]; + tensor var_479_equation_0 = const()[name = tensor("op_479_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_479_cast_fp16 = einsum(equation = var_479_equation_0, values = (var_289_cast_fp16, var_191_cast_fp16))[name = tensor("op_479_cast_fp16")]; + tensor var_480_to_fp16 = const()[name = tensor("op_480_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_481_cast_fp16 = mul(x = var_479_cast_fp16, y = var_480_to_fp16)[name = tensor("op_481_cast_fp16")]; + tensor var_483_equation_0 = const()[name = tensor("op_483_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_483_cast_fp16 = einsum(equation = var_483_equation_0, values = (var_289_cast_fp16, var_195_cast_fp16))[name = tensor("op_483_cast_fp16")]; + tensor var_484_to_fp16 = const()[name = tensor("op_484_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_485_cast_fp16 = mul(x = var_483_cast_fp16, y = var_484_to_fp16)[name = tensor("op_485_cast_fp16")]; + tensor var_487_equation_0 = const()[name = tensor("op_487_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_487_cast_fp16 = einsum(equation = var_487_equation_0, values = (var_289_cast_fp16, var_199_cast_fp16))[name = tensor("op_487_cast_fp16")]; + tensor var_488_to_fp16 = const()[name = tensor("op_488_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_489_cast_fp16 = mul(x = var_487_cast_fp16, y = var_488_to_fp16)[name = tensor("op_489_cast_fp16")]; + tensor var_491_equation_0 = const()[name = tensor("op_491_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_491_cast_fp16 = einsum(equation = var_491_equation_0, values = (var_301_cast_fp16, var_203_cast_fp16))[name = tensor("op_491_cast_fp16")]; + tensor var_492_to_fp16 = const()[name = tensor("op_492_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_493_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = tensor("op_493_cast_fp16")]; + tensor var_495_equation_0 = const()[name = tensor("op_495_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_495_cast_fp16 = einsum(equation = var_495_equation_0, values = (var_301_cast_fp16, var_207_cast_fp16))[name = tensor("op_495_cast_fp16")]; + tensor var_496_to_fp16 = const()[name = tensor("op_496_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_497_cast_fp16 = mul(x = var_495_cast_fp16, y = var_496_to_fp16)[name = tensor("op_497_cast_fp16")]; + tensor var_499_equation_0 = const()[name = tensor("op_499_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_499_cast_fp16 = einsum(equation = var_499_equation_0, values = (var_301_cast_fp16, var_211_cast_fp16))[name = tensor("op_499_cast_fp16")]; + tensor var_500_to_fp16 = const()[name = tensor("op_500_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_501_cast_fp16 = mul(x = var_499_cast_fp16, y = var_500_to_fp16)[name = tensor("op_501_cast_fp16")]; + tensor var_503_equation_0 = const()[name = tensor("op_503_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_503_cast_fp16 = einsum(equation = var_503_equation_0, values = (var_313_cast_fp16, var_215_cast_fp16))[name = tensor("op_503_cast_fp16")]; + tensor var_504_to_fp16 = const()[name = tensor("op_504_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_505_cast_fp16 = mul(x = var_503_cast_fp16, y = var_504_to_fp16)[name = tensor("op_505_cast_fp16")]; + tensor var_507_equation_0 = const()[name = tensor("op_507_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_507_cast_fp16 = einsum(equation = var_507_equation_0, values = (var_313_cast_fp16, var_219_cast_fp16))[name = tensor("op_507_cast_fp16")]; + tensor var_508_to_fp16 = const()[name = tensor("op_508_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_509_cast_fp16 = mul(x = var_507_cast_fp16, y = var_508_to_fp16)[name = tensor("op_509_cast_fp16")]; + tensor var_511_equation_0 = const()[name = tensor("op_511_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_511_cast_fp16 = einsum(equation = var_511_equation_0, values = (var_313_cast_fp16, var_223_cast_fp16))[name = tensor("op_511_cast_fp16")]; + tensor var_512_to_fp16 = const()[name = tensor("op_512_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_513_cast_fp16 = mul(x = var_511_cast_fp16, y = var_512_to_fp16)[name = tensor("op_513_cast_fp16")]; + tensor var_515_equation_0 = const()[name = tensor("op_515_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_515_cast_fp16 = einsum(equation = var_515_equation_0, values = (var_325_cast_fp16, var_227_cast_fp16))[name = tensor("op_515_cast_fp16")]; + tensor var_516_to_fp16 = const()[name = tensor("op_516_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_517_cast_fp16 = mul(x = var_515_cast_fp16, y = var_516_to_fp16)[name = tensor("op_517_cast_fp16")]; + tensor var_519_equation_0 = const()[name = tensor("op_519_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_519_cast_fp16 = einsum(equation = var_519_equation_0, values = (var_325_cast_fp16, var_231_cast_fp16))[name = tensor("op_519_cast_fp16")]; + tensor var_520_to_fp16 = const()[name = tensor("op_520_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_521_cast_fp16 = mul(x = var_519_cast_fp16, y = var_520_to_fp16)[name = tensor("op_521_cast_fp16")]; + tensor var_523_equation_0 = const()[name = tensor("op_523_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_523_cast_fp16 = einsum(equation = var_523_equation_0, values = (var_325_cast_fp16, var_235_cast_fp16))[name = tensor("op_523_cast_fp16")]; + tensor var_524_to_fp16 = const()[name = tensor("op_524_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_525_cast_fp16 = mul(x = var_523_cast_fp16, y = var_524_to_fp16)[name = tensor("op_525_cast_fp16")]; + tensor var_527_equation_0 = const()[name = tensor("op_527_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_527_cast_fp16 = einsum(equation = var_527_equation_0, values = (var_337_cast_fp16, var_239_cast_fp16))[name = tensor("op_527_cast_fp16")]; + tensor var_528_to_fp16 = const()[name = tensor("op_528_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_529_cast_fp16 = mul(x = var_527_cast_fp16, y = var_528_to_fp16)[name = tensor("op_529_cast_fp16")]; + tensor var_531_equation_0 = const()[name = tensor("op_531_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_531_cast_fp16 = einsum(equation = var_531_equation_0, values = (var_337_cast_fp16, var_243_cast_fp16))[name = tensor("op_531_cast_fp16")]; + tensor var_532_to_fp16 = const()[name = tensor("op_532_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_533_cast_fp16 = mul(x = var_531_cast_fp16, y = var_532_to_fp16)[name = tensor("op_533_cast_fp16")]; + tensor var_535_equation_0 = const()[name = tensor("op_535_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_535_cast_fp16 = einsum(equation = var_535_equation_0, values = (var_337_cast_fp16, var_247_cast_fp16))[name = tensor("op_535_cast_fp16")]; + tensor var_536_to_fp16 = const()[name = tensor("op_536_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_537_cast_fp16 = mul(x = var_535_cast_fp16, y = var_536_to_fp16)[name = tensor("op_537_cast_fp16")]; + tensor var_539_equation_0 = const()[name = tensor("op_539_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_539_cast_fp16 = einsum(equation = var_539_equation_0, values = (var_349_cast_fp16, var_251_cast_fp16))[name = tensor("op_539_cast_fp16")]; + tensor var_540_to_fp16 = const()[name = tensor("op_540_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_541_cast_fp16 = mul(x = var_539_cast_fp16, y = var_540_to_fp16)[name = tensor("op_541_cast_fp16")]; + tensor var_543_equation_0 = const()[name = tensor("op_543_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_543_cast_fp16 = einsum(equation = var_543_equation_0, values = (var_349_cast_fp16, var_255_cast_fp16))[name = tensor("op_543_cast_fp16")]; + tensor var_544_to_fp16 = const()[name = tensor("op_544_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_545_cast_fp16 = mul(x = var_543_cast_fp16, y = var_544_to_fp16)[name = tensor("op_545_cast_fp16")]; + tensor var_547_equation_0 = const()[name = tensor("op_547_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_547_cast_fp16 = einsum(equation = var_547_equation_0, values = (var_349_cast_fp16, var_259_cast_fp16))[name = tensor("op_547_cast_fp16")]; + tensor var_548_to_fp16 = const()[name = tensor("op_548_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_549_cast_fp16 = mul(x = var_547_cast_fp16, y = var_548_to_fp16)[name = tensor("op_549_cast_fp16")]; + tensor var_551_equation_0 = const()[name = tensor("op_551_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_551_cast_fp16 = einsum(equation = var_551_equation_0, values = (var_361_cast_fp16, var_263_cast_fp16))[name = tensor("op_551_cast_fp16")]; + tensor var_552_to_fp16 = const()[name = tensor("op_552_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_553_cast_fp16 = mul(x = var_551_cast_fp16, y = var_552_to_fp16)[name = tensor("op_553_cast_fp16")]; + tensor var_555_equation_0 = const()[name = tensor("op_555_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_555_cast_fp16 = einsum(equation = var_555_equation_0, values = (var_361_cast_fp16, var_267_cast_fp16))[name = tensor("op_555_cast_fp16")]; + tensor var_556_to_fp16 = const()[name = tensor("op_556_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_557_cast_fp16 = mul(x = var_555_cast_fp16, y = var_556_to_fp16)[name = tensor("op_557_cast_fp16")]; + tensor var_559_equation_0 = const()[name = tensor("op_559_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_559_cast_fp16 = einsum(equation = var_559_equation_0, values = (var_361_cast_fp16, var_271_cast_fp16))[name = tensor("op_559_cast_fp16")]; + tensor var_560_to_fp16 = const()[name = tensor("op_560_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_561_cast_fp16 = mul(x = var_559_cast_fp16, y = var_560_to_fp16)[name = tensor("op_561_cast_fp16")]; + tensor var_563_equation_0 = const()[name = tensor("op_563_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_563_cast_fp16 = einsum(equation = var_563_equation_0, values = (var_373_cast_fp16, var_275_cast_fp16))[name = tensor("op_563_cast_fp16")]; + tensor var_564_to_fp16 = const()[name = tensor("op_564_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_565_cast_fp16 = mul(x = var_563_cast_fp16, y = var_564_to_fp16)[name = tensor("op_565_cast_fp16")]; + tensor var_567_equation_0 = const()[name = tensor("op_567_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_567_cast_fp16 = einsum(equation = var_567_equation_0, values = (var_373_cast_fp16, var_279_cast_fp16))[name = tensor("op_567_cast_fp16")]; + tensor var_568_to_fp16 = const()[name = tensor("op_568_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_569_cast_fp16 = mul(x = var_567_cast_fp16, y = var_568_to_fp16)[name = tensor("op_569_cast_fp16")]; + tensor var_571_equation_0 = const()[name = tensor("op_571_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_571_cast_fp16 = einsum(equation = var_571_equation_0, values = (var_373_cast_fp16, var_283_cast_fp16))[name = tensor("op_571_cast_fp16")]; + tensor var_572_to_fp16 = const()[name = tensor("op_572_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_573_cast_fp16 = mul(x = var_571_cast_fp16, y = var_572_to_fp16)[name = tensor("op_573_cast_fp16")]; + tensor aw_1_cast_fp16 = add(x = var_481_cast_fp16, y = mask)[name = tensor("aw_1_cast_fp16")]; + tensor aw_3_cast_fp16 = add(x = var_485_cast_fp16, y = mask)[name = tensor("aw_3_cast_fp16")]; + tensor aw_5_cast_fp16 = add(x = var_489_cast_fp16, y = mask)[name = tensor("aw_5_cast_fp16")]; + tensor aw_7_cast_fp16 = add(x = var_493_cast_fp16, y = mask)[name = tensor("aw_7_cast_fp16")]; + tensor aw_9_cast_fp16 = add(x = var_497_cast_fp16, y = mask)[name = tensor("aw_9_cast_fp16")]; + tensor aw_11_cast_fp16 = add(x = var_501_cast_fp16, y = mask)[name = tensor("aw_11_cast_fp16")]; + tensor aw_13_cast_fp16 = add(x = var_505_cast_fp16, y = mask)[name = tensor("aw_13_cast_fp16")]; + tensor aw_15_cast_fp16 = add(x = var_509_cast_fp16, y = mask)[name = tensor("aw_15_cast_fp16")]; + tensor aw_17_cast_fp16 = add(x = var_513_cast_fp16, y = mask)[name = tensor("aw_17_cast_fp16")]; + tensor aw_19_cast_fp16 = add(x = var_517_cast_fp16, y = mask)[name = tensor("aw_19_cast_fp16")]; + tensor aw_21_cast_fp16 = add(x = var_521_cast_fp16, y = mask)[name = tensor("aw_21_cast_fp16")]; + tensor aw_23_cast_fp16 = add(x = var_525_cast_fp16, y = mask)[name = tensor("aw_23_cast_fp16")]; + tensor aw_25_cast_fp16 = add(x = var_529_cast_fp16, y = mask)[name = tensor("aw_25_cast_fp16")]; + tensor aw_27_cast_fp16 = add(x = var_533_cast_fp16, y = mask)[name = tensor("aw_27_cast_fp16")]; + tensor aw_29_cast_fp16 = add(x = var_537_cast_fp16, y = mask)[name = tensor("aw_29_cast_fp16")]; + tensor aw_31_cast_fp16 = add(x = var_541_cast_fp16, y = mask)[name = tensor("aw_31_cast_fp16")]; + tensor aw_33_cast_fp16 = add(x = var_545_cast_fp16, y = mask)[name = tensor("aw_33_cast_fp16")]; + tensor aw_35_cast_fp16 = add(x = var_549_cast_fp16, y = mask)[name = tensor("aw_35_cast_fp16")]; + tensor aw_37_cast_fp16 = add(x = var_553_cast_fp16, y = mask)[name = tensor("aw_37_cast_fp16")]; + tensor aw_39_cast_fp16 = add(x = var_557_cast_fp16, y = mask)[name = tensor("aw_39_cast_fp16")]; + tensor aw_41_cast_fp16 = add(x = var_561_cast_fp16, y = mask)[name = tensor("aw_41_cast_fp16")]; + tensor aw_43_cast_fp16 = add(x = var_565_cast_fp16, y = mask)[name = tensor("aw_43_cast_fp16")]; + tensor aw_45_cast_fp16 = add(x = var_569_cast_fp16, y = mask)[name = tensor("aw_45_cast_fp16")]; + tensor aw_47_cast_fp16 = add(x = var_573_cast_fp16, y = mask)[name = tensor("aw_47_cast_fp16")]; + tensor var_598_cast_fp16 = softmax(axis = var_52, x = aw_1_cast_fp16)[name = tensor("op_598_cast_fp16")]; + tensor var_599_cast_fp16 = softmax(axis = var_52, x = aw_3_cast_fp16)[name = tensor("op_599_cast_fp16")]; + tensor var_600_cast_fp16 = softmax(axis = var_52, x = aw_5_cast_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_601_cast_fp16 = softmax(axis = var_52, x = aw_7_cast_fp16)[name = tensor("op_601_cast_fp16")]; + tensor var_602_cast_fp16 = softmax(axis = var_52, x = aw_9_cast_fp16)[name = tensor("op_602_cast_fp16")]; + tensor var_603_cast_fp16 = softmax(axis = var_52, x = aw_11_cast_fp16)[name = tensor("op_603_cast_fp16")]; + tensor var_604_cast_fp16 = softmax(axis = var_52, x = aw_13_cast_fp16)[name = tensor("op_604_cast_fp16")]; + tensor var_605_cast_fp16 = softmax(axis = var_52, x = aw_15_cast_fp16)[name = tensor("op_605_cast_fp16")]; + tensor var_606_cast_fp16 = softmax(axis = var_52, x = aw_17_cast_fp16)[name = tensor("op_606_cast_fp16")]; + tensor var_607_cast_fp16 = softmax(axis = var_52, x = aw_19_cast_fp16)[name = tensor("op_607_cast_fp16")]; + tensor var_608_cast_fp16 = softmax(axis = var_52, x = aw_21_cast_fp16)[name = tensor("op_608_cast_fp16")]; + tensor var_609_cast_fp16 = softmax(axis = var_52, x = aw_23_cast_fp16)[name = tensor("op_609_cast_fp16")]; + tensor var_610_cast_fp16 = softmax(axis = var_52, x = aw_25_cast_fp16)[name = tensor("op_610_cast_fp16")]; + tensor var_611_cast_fp16 = softmax(axis = var_52, x = aw_27_cast_fp16)[name = tensor("op_611_cast_fp16")]; + tensor var_612_cast_fp16 = softmax(axis = var_52, x = aw_29_cast_fp16)[name = tensor("op_612_cast_fp16")]; + tensor var_613_cast_fp16 = softmax(axis = var_52, x = aw_31_cast_fp16)[name = tensor("op_613_cast_fp16")]; + tensor var_614_cast_fp16 = softmax(axis = var_52, x = aw_33_cast_fp16)[name = tensor("op_614_cast_fp16")]; + tensor var_615_cast_fp16 = softmax(axis = var_52, x = aw_35_cast_fp16)[name = tensor("op_615_cast_fp16")]; + tensor var_616_cast_fp16 = softmax(axis = var_52, x = aw_37_cast_fp16)[name = tensor("op_616_cast_fp16")]; + tensor var_617_cast_fp16 = softmax(axis = var_52, x = aw_39_cast_fp16)[name = tensor("op_617_cast_fp16")]; + tensor var_618_cast_fp16 = softmax(axis = var_52, x = aw_41_cast_fp16)[name = tensor("op_618_cast_fp16")]; + tensor var_619_cast_fp16 = softmax(axis = var_52, x = aw_43_cast_fp16)[name = tensor("op_619_cast_fp16")]; + tensor var_620_cast_fp16 = softmax(axis = var_52, x = aw_45_cast_fp16)[name = tensor("op_620_cast_fp16")]; + tensor var_621_cast_fp16 = softmax(axis = var_52, x = aw_47_cast_fp16)[name = tensor("op_621_cast_fp16")]; + tensor var_623_equation_0 = const()[name = tensor("op_623_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_623_cast_fp16 = einsum(equation = var_623_equation_0, values = (var_383_cast_fp16, var_598_cast_fp16))[name = tensor("op_623_cast_fp16")]; + tensor var_625_equation_0 = const()[name = tensor("op_625_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_625_cast_fp16 = einsum(equation = var_625_equation_0, values = (var_383_cast_fp16, var_599_cast_fp16))[name = tensor("op_625_cast_fp16")]; + tensor var_627_equation_0 = const()[name = tensor("op_627_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_627_cast_fp16 = einsum(equation = var_627_equation_0, values = (var_383_cast_fp16, var_600_cast_fp16))[name = tensor("op_627_cast_fp16")]; + tensor var_629_equation_0 = const()[name = tensor("op_629_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_629_cast_fp16 = einsum(equation = var_629_equation_0, values = (var_395_cast_fp16, var_601_cast_fp16))[name = tensor("op_629_cast_fp16")]; + tensor var_631_equation_0 = const()[name = tensor("op_631_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_631_cast_fp16 = einsum(equation = var_631_equation_0, values = (var_395_cast_fp16, var_602_cast_fp16))[name = tensor("op_631_cast_fp16")]; + tensor var_633_equation_0 = const()[name = tensor("op_633_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_633_cast_fp16 = einsum(equation = var_633_equation_0, values = (var_395_cast_fp16, var_603_cast_fp16))[name = tensor("op_633_cast_fp16")]; + tensor var_635_equation_0 = const()[name = tensor("op_635_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_635_cast_fp16 = einsum(equation = var_635_equation_0, values = (var_407_cast_fp16, var_604_cast_fp16))[name = tensor("op_635_cast_fp16")]; + tensor var_637_equation_0 = const()[name = tensor("op_637_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_637_cast_fp16 = einsum(equation = var_637_equation_0, values = (var_407_cast_fp16, var_605_cast_fp16))[name = tensor("op_637_cast_fp16")]; + tensor var_639_equation_0 = const()[name = tensor("op_639_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_639_cast_fp16 = einsum(equation = var_639_equation_0, values = (var_407_cast_fp16, var_606_cast_fp16))[name = tensor("op_639_cast_fp16")]; + tensor var_641_equation_0 = const()[name = tensor("op_641_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_641_cast_fp16 = einsum(equation = var_641_equation_0, values = (var_419_cast_fp16, var_607_cast_fp16))[name = tensor("op_641_cast_fp16")]; + tensor var_643_equation_0 = const()[name = tensor("op_643_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_643_cast_fp16 = einsum(equation = var_643_equation_0, values = (var_419_cast_fp16, var_608_cast_fp16))[name = tensor("op_643_cast_fp16")]; + tensor var_645_equation_0 = const()[name = tensor("op_645_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_645_cast_fp16 = einsum(equation = var_645_equation_0, values = (var_419_cast_fp16, var_609_cast_fp16))[name = tensor("op_645_cast_fp16")]; + tensor var_647_equation_0 = const()[name = tensor("op_647_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_647_cast_fp16 = einsum(equation = var_647_equation_0, values = (var_431_cast_fp16, var_610_cast_fp16))[name = tensor("op_647_cast_fp16")]; + tensor var_649_equation_0 = const()[name = tensor("op_649_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_649_cast_fp16 = einsum(equation = var_649_equation_0, values = (var_431_cast_fp16, var_611_cast_fp16))[name = tensor("op_649_cast_fp16")]; + tensor var_651_equation_0 = const()[name = tensor("op_651_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_651_cast_fp16 = einsum(equation = var_651_equation_0, values = (var_431_cast_fp16, var_612_cast_fp16))[name = tensor("op_651_cast_fp16")]; + tensor var_653_equation_0 = const()[name = tensor("op_653_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_653_cast_fp16 = einsum(equation = var_653_equation_0, values = (var_443_cast_fp16, var_613_cast_fp16))[name = tensor("op_653_cast_fp16")]; + tensor var_655_equation_0 = const()[name = tensor("op_655_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_655_cast_fp16 = einsum(equation = var_655_equation_0, values = (var_443_cast_fp16, var_614_cast_fp16))[name = tensor("op_655_cast_fp16")]; + tensor var_657_equation_0 = const()[name = tensor("op_657_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_657_cast_fp16 = einsum(equation = var_657_equation_0, values = (var_443_cast_fp16, var_615_cast_fp16))[name = tensor("op_657_cast_fp16")]; + tensor var_659_equation_0 = const()[name = tensor("op_659_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_659_cast_fp16 = einsum(equation = var_659_equation_0, values = (var_455_cast_fp16, var_616_cast_fp16))[name = tensor("op_659_cast_fp16")]; + tensor var_661_equation_0 = const()[name = tensor("op_661_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_661_cast_fp16 = einsum(equation = var_661_equation_0, values = (var_455_cast_fp16, var_617_cast_fp16))[name = tensor("op_661_cast_fp16")]; + tensor var_663_equation_0 = const()[name = tensor("op_663_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_663_cast_fp16 = einsum(equation = var_663_equation_0, values = (var_455_cast_fp16, var_618_cast_fp16))[name = tensor("op_663_cast_fp16")]; + tensor var_665_equation_0 = const()[name = tensor("op_665_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_665_cast_fp16 = einsum(equation = var_665_equation_0, values = (var_467_cast_fp16, var_619_cast_fp16))[name = tensor("op_665_cast_fp16")]; + tensor var_667_equation_0 = const()[name = tensor("op_667_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_667_cast_fp16 = einsum(equation = var_667_equation_0, values = (var_467_cast_fp16, var_620_cast_fp16))[name = tensor("op_667_cast_fp16")]; + tensor var_669_equation_0 = const()[name = tensor("op_669_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_669_cast_fp16 = einsum(equation = var_669_equation_0, values = (var_467_cast_fp16, var_621_cast_fp16))[name = tensor("op_669_cast_fp16")]; + tensor x_11_interleave_0 = const()[name = tensor("x_11_interleave_0"), val = tensor(false)]; + tensor x_11_cast_fp16 = concat(axis = var_52, interleave = x_11_interleave_0, values = (var_623_cast_fp16, var_625_cast_fp16, var_627_cast_fp16, var_629_cast_fp16, var_631_cast_fp16, var_633_cast_fp16, var_635_cast_fp16, var_637_cast_fp16, var_639_cast_fp16, var_641_cast_fp16, var_643_cast_fp16, var_645_cast_fp16, var_647_cast_fp16, var_649_cast_fp16, var_651_cast_fp16, var_653_cast_fp16, var_655_cast_fp16, var_657_cast_fp16, var_659_cast_fp16, var_661_cast_fp16, var_663_cast_fp16, var_665_cast_fp16, var_667_cast_fp16, var_669_cast_fp16))[name = tensor("x_11_cast_fp16")]; + tensor var_674 = const()[name = tensor("op_674"), val = tensor([1, 3072, -1, 8])]; + tensor input_3_cast_fp16 = reshape(shape = var_674, x = x_11_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor var_677 = const()[name = tensor("op_677"), val = tensor([1, 1])]; + tensor var_679 = const()[name = tensor("op_679"), val = tensor([1, 1])]; + tensor attention_output_1_pad_type_0 = const()[name = tensor("attention_output_1_pad_type_0"), val = tensor("custom")]; + tensor attention_output_1_pad_0 = const()[name = tensor("attention_output_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_0_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(31463936)))]; + tensor attention_output_1_cast_fp16 = conv(dilations = var_679, groups = var_52, pad = attention_output_1_pad_0, pad_type = attention_output_1_pad_type_0, strides = var_677, weight = blocks_0_attn_proj_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("attention_output_1_cast_fp16")]; + tensor x_13_cast_fp16 = add(x = attention_output_1_cast_fp16, y = x)[name = tensor("x_13_cast_fp16")]; + tensor x_eps_3_interleave_0 = const()[name = tensor("x_eps_3_interleave_0"), val = tensor(false)]; + tensor eps_chan_3_to_fp16 = const()[name = tensor("eps_chan_3_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338368)))]; + tensor x_eps_3_cast_fp16 = concat(axis = var_52, interleave = x_eps_3_interleave_0, values = (x_13_cast_fp16, eps_chan_3_to_fp16))[name = tensor("x_eps_3_cast_fp16")]; + tensor norm_x_3_axes_0 = const()[name = tensor("norm_x_3_axes_0"), val = tensor([1])]; + tensor norm_x_3_cast_fp16 = reduce_l2_norm(axes = norm_x_3_axes_0, keep_dims = var_55, x = x_eps_3_cast_fp16)[name = tensor("norm_x_3_cast_fp16")]; + tensor x_normed_7_cast_fp16 = real_div(x = x_13_cast_fp16, y = norm_x_3_cast_fp16)[name = tensor("x_normed_7_cast_fp16")]; + tensor var_705_to_fp16 = const()[name = tensor("op_705_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_9_cast_fp16 = mul(x = x_normed_7_cast_fp16, y = var_705_to_fp16)[name = tensor("x_normed_9_cast_fp16")]; + tensor blocks_0_norm_2_weight_to_fp16 = const()[name = tensor("blocks_0_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50338560)))]; + tensor input_5_cast_fp16 = mul(x = x_normed_9_cast_fp16, y = blocks_0_norm_2_weight_to_fp16)[name = tensor("input_5_cast_fp16")]; + tensor var_716 = const()[name = tensor("op_716"), val = tensor([1, 1])]; + tensor var_718 = const()[name = tensor("op_718"), val = tensor([1, 1])]; + tensor input_7_pad_type_0 = const()[name = tensor("input_7_pad_type_0"), val = tensor("custom")]; + tensor input_7_pad_0 = const()[name = tensor("input_7_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(50344768)))]; + tensor input_7_cast_fp16 = conv(dilations = var_718, groups = var_52, pad = input_7_pad_0, pad_type = input_7_pad_type_0, strides = var_716, weight = blocks_0_mlp_fc_1_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("input_7_cast_fp16")]; + tensor var_722 = const()[name = tensor("op_722"), val = tensor([1, 1])]; + tensor var_724 = const()[name = tensor("op_724"), val = tensor([1, 1])]; + tensor x_fc_2_1_pad_type_0 = const()[name = tensor("x_fc_2_1_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_1_pad_0 = const()[name = tensor("x_fc_2_1_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(100676480)))]; + tensor x_fc_2_1_cast_fp16 = conv(dilations = var_724, groups = var_52, pad = x_fc_2_1_pad_0, pad_type = x_fc_2_1_pad_type_0, strides = var_722, weight = blocks_0_mlp_fc_2_weight_to_fp16, x = input_5_cast_fp16)[name = tensor("x_fc_2_1_cast_fp16")]; + tensor var_727_cast_fp16 = silu(x = input_7_cast_fp16)[name = tensor("op_727_cast_fp16")]; + tensor input_9_cast_fp16 = mul(x = var_727_cast_fp16, y = x_fc_2_1_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor var_730 = const()[name = tensor("op_730"), val = tensor([1, 1])]; + tensor var_732 = const()[name = tensor("op_732"), val = tensor([1, 1])]; + tensor var_734_pad_type_0 = const()[name = tensor("op_734_pad_type_0"), val = tensor("custom")]; + tensor var_734_pad_0 = const()[name = tensor("op_734_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_0_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_0_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(151008192)))]; + tensor var_734_cast_fp16 = conv(dilations = var_732, groups = var_52, pad = var_734_pad_0, pad_type = var_734_pad_type_0, strides = var_730, weight = blocks_0_mlp_proj_weight_to_fp16, x = input_9_cast_fp16)[name = tensor("op_734_cast_fp16")]; + tensor x_17_cast_fp16 = add(x = var_734_cast_fp16, y = x_13_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor var_740 = const()[name = tensor("op_740"), val = tensor(-1)]; + tensor var_744 = const()[name = tensor("op_744"), val = tensor(-2)]; + tensor var_746 = const()[name = tensor("op_746"), val = tensor(-3)]; + tensor var_779 = const()[name = tensor("op_779"), val = tensor(1)]; + tensor var_782 = const()[name = tensor("op_782"), val = tensor(true)]; + tensor x_eps_5_interleave_0 = const()[name = tensor("x_eps_5_interleave_0"), val = tensor(false)]; + tensor eps_chan_5_to_fp16 = const()[name = tensor("eps_chan_5_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201339904)))]; + tensor x_eps_5_cast_fp16 = concat(axis = var_779, interleave = x_eps_5_interleave_0, values = (x_17_cast_fp16, eps_chan_5_to_fp16))[name = tensor("x_eps_5_cast_fp16")]; + tensor norm_x_5_axes_0 = const()[name = tensor("norm_x_5_axes_0"), val = tensor([1])]; + tensor norm_x_5_cast_fp16 = reduce_l2_norm(axes = norm_x_5_axes_0, keep_dims = var_782, x = x_eps_5_cast_fp16)[name = tensor("norm_x_5_cast_fp16")]; + tensor x_normed_13_cast_fp16 = real_div(x = x_17_cast_fp16, y = norm_x_5_cast_fp16)[name = tensor("x_normed_13_cast_fp16")]; + tensor var_805_to_fp16 = const()[name = tensor("op_805_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_15_cast_fp16 = mul(x = x_normed_13_cast_fp16, y = var_805_to_fp16)[name = tensor("x_normed_15_cast_fp16")]; + tensor blocks_1_norm_1_weight_to_fp16 = const()[name = tensor("blocks_1_norm_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201340096)))]; + tensor x_21_cast_fp16 = mul(x = x_normed_15_cast_fp16, y = blocks_1_norm_1_weight_to_fp16)[name = tensor("x_21_cast_fp16")]; + tensor var_829 = const()[name = tensor("op_829"), val = tensor([1, 3072, 1, -1])]; + tensor input_11_cast_fp16 = reshape(shape = var_829, x = x_21_cast_fp16)[name = tensor("input_11_cast_fp16")]; + tensor var_832 = const()[name = tensor("op_832"), val = tensor([1, 1])]; + tensor var_834 = const()[name = tensor("op_834"), val = tensor([1, 1])]; + tensor q_9_pad_type_0 = const()[name = tensor("q_9_pad_type_0"), val = tensor("custom")]; + tensor q_9_pad_0 = const()[name = tensor("q_9_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_q_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_q_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(201346304)))]; + tensor q_9_cast_fp16 = conv(dilations = var_834, groups = var_779, pad = q_9_pad_0, pad_type = q_9_pad_type_0, strides = var_832, weight = blocks_1_attn_q_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("q_9_cast_fp16")]; + tensor var_838 = const()[name = tensor("op_838"), val = tensor([1, 1])]; + tensor var_840 = const()[name = tensor("op_840"), val = tensor([1, 1])]; + tensor k_13_pad_type_0 = const()[name = tensor("k_13_pad_type_0"), val = tensor("custom")]; + tensor k_13_pad_0 = const()[name = tensor("k_13_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_k_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_k_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(220220736)))]; + tensor k_13_cast_fp16 = conv(dilations = var_840, groups = var_779, pad = k_13_pad_0, pad_type = k_13_pad_type_0, strides = var_838, weight = blocks_1_attn_k_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("k_13_cast_fp16")]; + tensor var_844 = const()[name = tensor("op_844"), val = tensor([1, 1])]; + tensor var_846 = const()[name = tensor("op_846"), val = tensor([1, 1])]; + tensor v_11_pad_type_0 = const()[name = tensor("v_11_pad_type_0"), val = tensor("custom")]; + tensor v_11_pad_0 = const()[name = tensor("v_11_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_v_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_v_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(226512256)))]; + tensor v_11_cast_fp16 = conv(dilations = var_846, groups = var_779, pad = v_11_pad_0, pad_type = v_11_pad_type_0, strides = var_844, weight = blocks_1_attn_v_proj_weight_to_fp16, x = input_11_cast_fp16)[name = tensor("v_11_cast_fp16")]; + tensor var_849 = const()[name = tensor("op_849"), val = tensor([1, 24, 128, 64])]; + tensor q_11_cast_fp16 = reshape(shape = var_849, x = q_9_cast_fp16)[name = tensor("q_11_cast_fp16")]; + tensor var_851 = const()[name = tensor("op_851"), val = tensor([1, -1, 128, 64])]; + tensor k_15_cast_fp16 = reshape(shape = var_851, x = k_13_cast_fp16)[name = tensor("k_15_cast_fp16")]; + tensor var_865_begin_0 = const()[name = tensor("op_865_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_865_end_0 = const()[name = tensor("op_865_end_0"), val = tensor([1, 24, 64, 64])]; + tensor var_865_end_mask_0 = const()[name = tensor("op_865_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_865_cast_fp16 = slice_by_index(begin = var_865_begin_0, end = var_865_end_0, end_mask = var_865_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_865_cast_fp16")]; + tensor var_871_begin_0 = const()[name = tensor("op_871_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_871_end_0 = const()[name = tensor("op_871_end_0"), val = tensor([1, 24, 128, 64])]; + tensor var_871_end_mask_0 = const()[name = tensor("op_871_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_871_cast_fp16 = slice_by_index(begin = var_871_begin_0, end = var_871_end_0, end_mask = var_871_end_mask_0, x = q_11_cast_fp16)[name = tensor("op_871_cast_fp16")]; + tensor const_30_promoted_to_fp16 = const()[name = tensor("const_30_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_873_cast_fp16 = mul(x = var_871_cast_fp16, y = const_30_promoted_to_fp16)[name = tensor("op_873_cast_fp16")]; + tensor rotated_5_interleave_0 = const()[name = tensor("rotated_5_interleave_0"), val = tensor(false)]; + tensor rotated_5_cast_fp16 = concat(axis = var_744, interleave = rotated_5_interleave_0, values = (var_873_cast_fp16, var_865_cast_fp16))[name = tensor("rotated_5_cast_fp16")]; + tensor var_876_cast_fp16 = mul(x = q_11_cast_fp16, y = cos)[name = tensor("op_876_cast_fp16")]; + tensor var_877_cast_fp16 = mul(x = rotated_5_cast_fp16, y = sin)[name = tensor("op_877_cast_fp16")]; + tensor roped_5_cast_fp16 = add(x = var_876_cast_fp16, y = var_877_cast_fp16)[name = tensor("roped_5_cast_fp16")]; + tensor var_890_begin_0 = const()[name = tensor("op_890_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_890_end_0 = const()[name = tensor("op_890_end_0"), val = tensor([1, 8, 64, 64])]; + tensor var_890_end_mask_0 = const()[name = tensor("op_890_end_mask_0"), val = tensor([true, true, false, true])]; + tensor var_890_cast_fp16 = slice_by_index(begin = var_890_begin_0, end = var_890_end_0, end_mask = var_890_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_890_cast_fp16")]; + tensor var_896_begin_0 = const()[name = tensor("op_896_begin_0"), val = tensor([0, 0, 64, 0])]; + tensor var_896_end_0 = const()[name = tensor("op_896_end_0"), val = tensor([1, 8, 128, 64])]; + tensor var_896_end_mask_0 = const()[name = tensor("op_896_end_mask_0"), val = tensor([true, true, true, true])]; + tensor var_896_cast_fp16 = slice_by_index(begin = var_896_begin_0, end = var_896_end_0, end_mask = var_896_end_mask_0, x = k_15_cast_fp16)[name = tensor("op_896_cast_fp16")]; + tensor const_32_promoted_to_fp16 = const()[name = tensor("const_32_promoted_to_fp16"), val = tensor(-0x1p+0)]; + tensor var_898_cast_fp16 = mul(x = var_896_cast_fp16, y = const_32_promoted_to_fp16)[name = tensor("op_898_cast_fp16")]; + tensor rotated_interleave_0 = const()[name = tensor("rotated_interleave_0"), val = tensor(false)]; + tensor rotated_cast_fp16 = concat(axis = var_744, interleave = rotated_interleave_0, values = (var_898_cast_fp16, var_890_cast_fp16))[name = tensor("rotated_cast_fp16")]; + tensor var_901_cast_fp16 = mul(x = k_15_cast_fp16, y = cos)[name = tensor("op_901_cast_fp16")]; + tensor var_902_cast_fp16 = mul(x = rotated_cast_fp16, y = sin)[name = tensor("op_902_cast_fp16")]; + tensor roped_cast_fp16 = add(x = var_901_cast_fp16, y = var_902_cast_fp16)[name = tensor("roped_cast_fp16")]; + tensor var_905 = const()[name = tensor("op_905"), val = tensor([1, -1, 1, 64])]; + tensor k_19_cast_fp16 = reshape(shape = var_905, x = roped_cast_fp16)[name = tensor("k_19_cast_fp16")]; + tensor var_907 = const()[name = tensor("op_907"), val = tensor([1, -1, 1, 64])]; + tensor new_v_cache_1 = reshape(shape = var_907, x = v_11_cast_fp16)[name = tensor("new_v_cache_1_type_fp32_cast_fp16")]; + tensor k_21_perm_0 = const()[name = tensor("k_21_perm_0"), val = tensor([0, -1, 2, -3])]; + tensor k_interleave_0 = const()[name = tensor("k_interleave_0"), val = tensor(false)]; + tensor new_k_cache_1 = transpose(perm = k_21_perm_0, x = k_19_cast_fp16)[name = tensor("transpose_0")]; + tensor k_cast_fp16 = concat(axis = var_746, interleave = k_interleave_0, values = (k_cache_1, new_k_cache_1))[name = tensor("k_cast_fp16")]; + tensor v_17_interleave_0 = const()[name = tensor("v_17_interleave_0"), val = tensor(false)]; + tensor v_17_cast_fp16 = concat(axis = var_740, interleave = v_17_interleave_0, values = (v_cache_1, new_v_cache_1))[name = tensor("v_17_cast_fp16")]; + tensor var_915 = const()[name = tensor("op_915"), val = tensor([1, 3072, 1, -1])]; + tensor q_cast_fp16 = reshape(shape = var_915, x = roped_5_cast_fp16)[name = tensor("q_cast_fp16")]; + tensor var_920_begin_0 = const()[name = tensor("op_920_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_920_end_0 = const()[name = tensor("op_920_end_0"), val = tensor([1, 128, 1, 64])]; + tensor var_920_end_mask_0 = const()[name = tensor("op_920_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_920_cast_fp16 = slice_by_index(begin = var_920_begin_0, end = var_920_end_0, end_mask = var_920_end_mask_0, x = q_cast_fp16)[name = tensor("op_920_cast_fp16")]; + tensor var_924_begin_0 = const()[name = tensor("op_924_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_924_end_0 = const()[name = tensor("op_924_end_0"), val = tensor([1, 256, 1, 64])]; + tensor var_924_end_mask_0 = const()[name = tensor("op_924_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_924_cast_fp16 = slice_by_index(begin = var_924_begin_0, end = var_924_end_0, end_mask = var_924_end_mask_0, x = q_cast_fp16)[name = tensor("op_924_cast_fp16")]; + tensor var_928_begin_0 = const()[name = tensor("op_928_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_928_end_0 = const()[name = tensor("op_928_end_0"), val = tensor([1, 384, 1, 64])]; + tensor var_928_end_mask_0 = const()[name = tensor("op_928_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_928_cast_fp16 = slice_by_index(begin = var_928_begin_0, end = var_928_end_0, end_mask = var_928_end_mask_0, x = q_cast_fp16)[name = tensor("op_928_cast_fp16")]; + tensor var_932_begin_0 = const()[name = tensor("op_932_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_932_end_0 = const()[name = tensor("op_932_end_0"), val = tensor([1, 512, 1, 64])]; + tensor var_932_end_mask_0 = const()[name = tensor("op_932_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_932_cast_fp16 = slice_by_index(begin = var_932_begin_0, end = var_932_end_0, end_mask = var_932_end_mask_0, x = q_cast_fp16)[name = tensor("op_932_cast_fp16")]; + tensor var_936_begin_0 = const()[name = tensor("op_936_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_936_end_0 = const()[name = tensor("op_936_end_0"), val = tensor([1, 640, 1, 64])]; + tensor var_936_end_mask_0 = const()[name = tensor("op_936_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_936_cast_fp16 = slice_by_index(begin = var_936_begin_0, end = var_936_end_0, end_mask = var_936_end_mask_0, x = q_cast_fp16)[name = tensor("op_936_cast_fp16")]; + tensor var_940_begin_0 = const()[name = tensor("op_940_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_940_end_0 = const()[name = tensor("op_940_end_0"), val = tensor([1, 768, 1, 64])]; + tensor var_940_end_mask_0 = const()[name = tensor("op_940_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_940_cast_fp16 = slice_by_index(begin = var_940_begin_0, end = var_940_end_0, end_mask = var_940_end_mask_0, x = q_cast_fp16)[name = tensor("op_940_cast_fp16")]; + tensor var_944_begin_0 = const()[name = tensor("op_944_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_944_end_0 = const()[name = tensor("op_944_end_0"), val = tensor([1, 896, 1, 64])]; + tensor var_944_end_mask_0 = const()[name = tensor("op_944_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_944_cast_fp16 = slice_by_index(begin = var_944_begin_0, end = var_944_end_0, end_mask = var_944_end_mask_0, x = q_cast_fp16)[name = tensor("op_944_cast_fp16")]; + tensor var_948_begin_0 = const()[name = tensor("op_948_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_948_end_0 = const()[name = tensor("op_948_end_0"), val = tensor([1, 1024, 1, 64])]; + tensor var_948_end_mask_0 = const()[name = tensor("op_948_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_948_cast_fp16 = slice_by_index(begin = var_948_begin_0, end = var_948_end_0, end_mask = var_948_end_mask_0, x = q_cast_fp16)[name = tensor("op_948_cast_fp16")]; + tensor var_952_begin_0 = const()[name = tensor("op_952_begin_0"), val = tensor([0, 1024, 0, 0])]; + tensor var_952_end_0 = const()[name = tensor("op_952_end_0"), val = tensor([1, 1152, 1, 64])]; + tensor var_952_end_mask_0 = const()[name = tensor("op_952_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_952_cast_fp16 = slice_by_index(begin = var_952_begin_0, end = var_952_end_0, end_mask = var_952_end_mask_0, x = q_cast_fp16)[name = tensor("op_952_cast_fp16")]; + tensor var_956_begin_0 = const()[name = tensor("op_956_begin_0"), val = tensor([0, 1152, 0, 0])]; + tensor var_956_end_0 = const()[name = tensor("op_956_end_0"), val = tensor([1, 1280, 1, 64])]; + tensor var_956_end_mask_0 = const()[name = tensor("op_956_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_956_cast_fp16 = slice_by_index(begin = var_956_begin_0, end = var_956_end_0, end_mask = var_956_end_mask_0, x = q_cast_fp16)[name = tensor("op_956_cast_fp16")]; + tensor var_960_begin_0 = const()[name = tensor("op_960_begin_0"), val = tensor([0, 1280, 0, 0])]; + tensor var_960_end_0 = const()[name = tensor("op_960_end_0"), val = tensor([1, 1408, 1, 64])]; + tensor var_960_end_mask_0 = const()[name = tensor("op_960_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_960_cast_fp16 = slice_by_index(begin = var_960_begin_0, end = var_960_end_0, end_mask = var_960_end_mask_0, x = q_cast_fp16)[name = tensor("op_960_cast_fp16")]; + tensor var_964_begin_0 = const()[name = tensor("op_964_begin_0"), val = tensor([0, 1408, 0, 0])]; + tensor var_964_end_0 = const()[name = tensor("op_964_end_0"), val = tensor([1, 1536, 1, 64])]; + tensor var_964_end_mask_0 = const()[name = tensor("op_964_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_964_cast_fp16 = slice_by_index(begin = var_964_begin_0, end = var_964_end_0, end_mask = var_964_end_mask_0, x = q_cast_fp16)[name = tensor("op_964_cast_fp16")]; + tensor var_968_begin_0 = const()[name = tensor("op_968_begin_0"), val = tensor([0, 1536, 0, 0])]; + tensor var_968_end_0 = const()[name = tensor("op_968_end_0"), val = tensor([1, 1664, 1, 64])]; + tensor var_968_end_mask_0 = const()[name = tensor("op_968_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_968_cast_fp16 = slice_by_index(begin = var_968_begin_0, end = var_968_end_0, end_mask = var_968_end_mask_0, x = q_cast_fp16)[name = tensor("op_968_cast_fp16")]; + tensor var_972_begin_0 = const()[name = tensor("op_972_begin_0"), val = tensor([0, 1664, 0, 0])]; + tensor var_972_end_0 = const()[name = tensor("op_972_end_0"), val = tensor([1, 1792, 1, 64])]; + tensor var_972_end_mask_0 = const()[name = tensor("op_972_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_972_cast_fp16 = slice_by_index(begin = var_972_begin_0, end = var_972_end_0, end_mask = var_972_end_mask_0, x = q_cast_fp16)[name = tensor("op_972_cast_fp16")]; + tensor var_976_begin_0 = const()[name = tensor("op_976_begin_0"), val = tensor([0, 1792, 0, 0])]; + tensor var_976_end_0 = const()[name = tensor("op_976_end_0"), val = tensor([1, 1920, 1, 64])]; + tensor var_976_end_mask_0 = const()[name = tensor("op_976_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_976_cast_fp16 = slice_by_index(begin = var_976_begin_0, end = var_976_end_0, end_mask = var_976_end_mask_0, x = q_cast_fp16)[name = tensor("op_976_cast_fp16")]; + tensor var_980_begin_0 = const()[name = tensor("op_980_begin_0"), val = tensor([0, 1920, 0, 0])]; + tensor var_980_end_0 = const()[name = tensor("op_980_end_0"), val = tensor([1, 2048, 1, 64])]; + tensor var_980_end_mask_0 = const()[name = tensor("op_980_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_980_cast_fp16 = slice_by_index(begin = var_980_begin_0, end = var_980_end_0, end_mask = var_980_end_mask_0, x = q_cast_fp16)[name = tensor("op_980_cast_fp16")]; + tensor var_984_begin_0 = const()[name = tensor("op_984_begin_0"), val = tensor([0, 2048, 0, 0])]; + tensor var_984_end_0 = const()[name = tensor("op_984_end_0"), val = tensor([1, 2176, 1, 64])]; + tensor var_984_end_mask_0 = const()[name = tensor("op_984_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_984_cast_fp16 = slice_by_index(begin = var_984_begin_0, end = var_984_end_0, end_mask = var_984_end_mask_0, x = q_cast_fp16)[name = tensor("op_984_cast_fp16")]; + tensor var_988_begin_0 = const()[name = tensor("op_988_begin_0"), val = tensor([0, 2176, 0, 0])]; + tensor var_988_end_0 = const()[name = tensor("op_988_end_0"), val = tensor([1, 2304, 1, 64])]; + tensor var_988_end_mask_0 = const()[name = tensor("op_988_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_988_cast_fp16 = slice_by_index(begin = var_988_begin_0, end = var_988_end_0, end_mask = var_988_end_mask_0, x = q_cast_fp16)[name = tensor("op_988_cast_fp16")]; + tensor var_992_begin_0 = const()[name = tensor("op_992_begin_0"), val = tensor([0, 2304, 0, 0])]; + tensor var_992_end_0 = const()[name = tensor("op_992_end_0"), val = tensor([1, 2432, 1, 64])]; + tensor var_992_end_mask_0 = const()[name = tensor("op_992_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_992_cast_fp16 = slice_by_index(begin = var_992_begin_0, end = var_992_end_0, end_mask = var_992_end_mask_0, x = q_cast_fp16)[name = tensor("op_992_cast_fp16")]; + tensor var_996_begin_0 = const()[name = tensor("op_996_begin_0"), val = tensor([0, 2432, 0, 0])]; + tensor var_996_end_0 = const()[name = tensor("op_996_end_0"), val = tensor([1, 2560, 1, 64])]; + tensor var_996_end_mask_0 = const()[name = tensor("op_996_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_996_cast_fp16 = slice_by_index(begin = var_996_begin_0, end = var_996_end_0, end_mask = var_996_end_mask_0, x = q_cast_fp16)[name = tensor("op_996_cast_fp16")]; + tensor var_1000_begin_0 = const()[name = tensor("op_1000_begin_0"), val = tensor([0, 2560, 0, 0])]; + tensor var_1000_end_0 = const()[name = tensor("op_1000_end_0"), val = tensor([1, 2688, 1, 64])]; + tensor var_1000_end_mask_0 = const()[name = tensor("op_1000_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1000_cast_fp16 = slice_by_index(begin = var_1000_begin_0, end = var_1000_end_0, end_mask = var_1000_end_mask_0, x = q_cast_fp16)[name = tensor("op_1000_cast_fp16")]; + tensor var_1004_begin_0 = const()[name = tensor("op_1004_begin_0"), val = tensor([0, 2688, 0, 0])]; + tensor var_1004_end_0 = const()[name = tensor("op_1004_end_0"), val = tensor([1, 2816, 1, 64])]; + tensor var_1004_end_mask_0 = const()[name = tensor("op_1004_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1004_cast_fp16 = slice_by_index(begin = var_1004_begin_0, end = var_1004_end_0, end_mask = var_1004_end_mask_0, x = q_cast_fp16)[name = tensor("op_1004_cast_fp16")]; + tensor var_1008_begin_0 = const()[name = tensor("op_1008_begin_0"), val = tensor([0, 2816, 0, 0])]; + tensor var_1008_end_0 = const()[name = tensor("op_1008_end_0"), val = tensor([1, 2944, 1, 64])]; + tensor var_1008_end_mask_0 = const()[name = tensor("op_1008_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1008_cast_fp16 = slice_by_index(begin = var_1008_begin_0, end = var_1008_end_0, end_mask = var_1008_end_mask_0, x = q_cast_fp16)[name = tensor("op_1008_cast_fp16")]; + tensor var_1012_begin_0 = const()[name = tensor("op_1012_begin_0"), val = tensor([0, 2944, 0, 0])]; + tensor var_1012_end_0 = const()[name = tensor("op_1012_end_0"), val = tensor([1, 3072, 1, 64])]; + tensor var_1012_end_mask_0 = const()[name = tensor("op_1012_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1012_cast_fp16 = slice_by_index(begin = var_1012_begin_0, end = var_1012_end_0, end_mask = var_1012_end_mask_0, x = q_cast_fp16)[name = tensor("op_1012_cast_fp16")]; + tensor var_1018_begin_0 = const()[name = tensor("op_1018_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1018_end_0 = const()[name = tensor("op_1018_end_0"), val = tensor([1, 512, 1, 128])]; + tensor var_1018_end_mask_0 = const()[name = tensor("op_1018_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1018_cast_fp16 = slice_by_index(begin = var_1018_begin_0, end = var_1018_end_0, end_mask = var_1018_end_mask_0, x = k_cast_fp16)[name = tensor("op_1018_cast_fp16")]; + tensor var_1030_begin_0 = const()[name = tensor("op_1030_begin_0"), val = tensor([0, 0, 0, 128])]; + tensor var_1030_end_0 = const()[name = tensor("op_1030_end_0"), val = tensor([1, 512, 1, 256])]; + tensor var_1030_end_mask_0 = const()[name = tensor("op_1030_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1030_cast_fp16 = slice_by_index(begin = var_1030_begin_0, end = var_1030_end_0, end_mask = var_1030_end_mask_0, x = k_cast_fp16)[name = tensor("op_1030_cast_fp16")]; + tensor var_1042_begin_0 = const()[name = tensor("op_1042_begin_0"), val = tensor([0, 0, 0, 256])]; + tensor var_1042_end_0 = const()[name = tensor("op_1042_end_0"), val = tensor([1, 512, 1, 384])]; + tensor var_1042_end_mask_0 = const()[name = tensor("op_1042_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1042_cast_fp16 = slice_by_index(begin = var_1042_begin_0, end = var_1042_end_0, end_mask = var_1042_end_mask_0, x = k_cast_fp16)[name = tensor("op_1042_cast_fp16")]; + tensor var_1054_begin_0 = const()[name = tensor("op_1054_begin_0"), val = tensor([0, 0, 0, 384])]; + tensor var_1054_end_0 = const()[name = tensor("op_1054_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1054_end_mask_0 = const()[name = tensor("op_1054_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1054_cast_fp16 = slice_by_index(begin = var_1054_begin_0, end = var_1054_end_0, end_mask = var_1054_end_mask_0, x = k_cast_fp16)[name = tensor("op_1054_cast_fp16")]; + tensor var_1066_begin_0 = const()[name = tensor("op_1066_begin_0"), val = tensor([0, 0, 0, 512])]; + tensor var_1066_end_0 = const()[name = tensor("op_1066_end_0"), val = tensor([1, 512, 1, 640])]; + tensor var_1066_end_mask_0 = const()[name = tensor("op_1066_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1066_cast_fp16 = slice_by_index(begin = var_1066_begin_0, end = var_1066_end_0, end_mask = var_1066_end_mask_0, x = k_cast_fp16)[name = tensor("op_1066_cast_fp16")]; + tensor var_1078_begin_0 = const()[name = tensor("op_1078_begin_0"), val = tensor([0, 0, 0, 640])]; + tensor var_1078_end_0 = const()[name = tensor("op_1078_end_0"), val = tensor([1, 512, 1, 768])]; + tensor var_1078_end_mask_0 = const()[name = tensor("op_1078_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1078_cast_fp16 = slice_by_index(begin = var_1078_begin_0, end = var_1078_end_0, end_mask = var_1078_end_mask_0, x = k_cast_fp16)[name = tensor("op_1078_cast_fp16")]; + tensor var_1090_begin_0 = const()[name = tensor("op_1090_begin_0"), val = tensor([0, 0, 0, 768])]; + tensor var_1090_end_0 = const()[name = tensor("op_1090_end_0"), val = tensor([1, 512, 1, 896])]; + tensor var_1090_end_mask_0 = const()[name = tensor("op_1090_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = k_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor var_1102_begin_0 = const()[name = tensor("op_1102_begin_0"), val = tensor([0, 0, 0, 896])]; + tensor var_1102_end_0 = const()[name = tensor("op_1102_end_0"), val = tensor([1, 512, 1, 1024])]; + tensor var_1102_end_mask_0 = const()[name = tensor("op_1102_end_mask_0"), val = tensor([true, true, true, false])]; + tensor var_1102_cast_fp16 = slice_by_index(begin = var_1102_begin_0, end = var_1102_end_0, end_mask = var_1102_end_mask_0, x = k_cast_fp16)[name = tensor("op_1102_cast_fp16")]; + tensor var_1112_begin_0 = const()[name = tensor("op_1112_begin_0"), val = tensor([0, 0, 0, 0])]; + tensor var_1112_end_0 = const()[name = tensor("op_1112_end_0"), val = tensor([1, 128, 1, 512])]; + tensor var_1112_end_mask_0 = const()[name = tensor("op_1112_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1112_cast_fp16 = slice_by_index(begin = var_1112_begin_0, end = var_1112_end_0, end_mask = var_1112_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1112_cast_fp16")]; + tensor var_1124_begin_0 = const()[name = tensor("op_1124_begin_0"), val = tensor([0, 128, 0, 0])]; + tensor var_1124_end_0 = const()[name = tensor("op_1124_end_0"), val = tensor([1, 256, 1, 512])]; + tensor var_1124_end_mask_0 = const()[name = tensor("op_1124_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1124_cast_fp16 = slice_by_index(begin = var_1124_begin_0, end = var_1124_end_0, end_mask = var_1124_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1124_cast_fp16")]; + tensor var_1136_begin_0 = const()[name = tensor("op_1136_begin_0"), val = tensor([0, 256, 0, 0])]; + tensor var_1136_end_0 = const()[name = tensor("op_1136_end_0"), val = tensor([1, 384, 1, 512])]; + tensor var_1136_end_mask_0 = const()[name = tensor("op_1136_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1136_cast_fp16 = slice_by_index(begin = var_1136_begin_0, end = var_1136_end_0, end_mask = var_1136_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1136_cast_fp16")]; + tensor var_1148_begin_0 = const()[name = tensor("op_1148_begin_0"), val = tensor([0, 384, 0, 0])]; + tensor var_1148_end_0 = const()[name = tensor("op_1148_end_0"), val = tensor([1, 512, 1, 512])]; + tensor var_1148_end_mask_0 = const()[name = tensor("op_1148_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1148_cast_fp16 = slice_by_index(begin = var_1148_begin_0, end = var_1148_end_0, end_mask = var_1148_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1148_cast_fp16")]; + tensor var_1160_begin_0 = const()[name = tensor("op_1160_begin_0"), val = tensor([0, 512, 0, 0])]; + tensor var_1160_end_0 = const()[name = tensor("op_1160_end_0"), val = tensor([1, 640, 1, 512])]; + tensor var_1160_end_mask_0 = const()[name = tensor("op_1160_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1160_cast_fp16 = slice_by_index(begin = var_1160_begin_0, end = var_1160_end_0, end_mask = var_1160_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1160_cast_fp16")]; + tensor var_1172_begin_0 = const()[name = tensor("op_1172_begin_0"), val = tensor([0, 640, 0, 0])]; + tensor var_1172_end_0 = const()[name = tensor("op_1172_end_0"), val = tensor([1, 768, 1, 512])]; + tensor var_1172_end_mask_0 = const()[name = tensor("op_1172_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1172_cast_fp16 = slice_by_index(begin = var_1172_begin_0, end = var_1172_end_0, end_mask = var_1172_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1172_cast_fp16")]; + tensor var_1184_begin_0 = const()[name = tensor("op_1184_begin_0"), val = tensor([0, 768, 0, 0])]; + tensor var_1184_end_0 = const()[name = tensor("op_1184_end_0"), val = tensor([1, 896, 1, 512])]; + tensor var_1184_end_mask_0 = const()[name = tensor("op_1184_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1184_cast_fp16 = slice_by_index(begin = var_1184_begin_0, end = var_1184_end_0, end_mask = var_1184_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1184_cast_fp16")]; + tensor var_1196_begin_0 = const()[name = tensor("op_1196_begin_0"), val = tensor([0, 896, 0, 0])]; + tensor var_1196_end_0 = const()[name = tensor("op_1196_end_0"), val = tensor([1, 1024, 1, 512])]; + tensor var_1196_end_mask_0 = const()[name = tensor("op_1196_end_mask_0"), val = tensor([true, false, true, true])]; + tensor var_1196_cast_fp16 = slice_by_index(begin = var_1196_begin_0, end = var_1196_end_0, end_mask = var_1196_end_mask_0, x = v_17_cast_fp16)[name = tensor("op_1196_cast_fp16")]; + tensor var_1208_equation_0 = const()[name = tensor("op_1208_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1208_cast_fp16 = einsum(equation = var_1208_equation_0, values = (var_1018_cast_fp16, var_920_cast_fp16))[name = tensor("op_1208_cast_fp16")]; + tensor var_1209_to_fp16 = const()[name = tensor("op_1209_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1210_cast_fp16 = mul(x = var_1208_cast_fp16, y = var_1209_to_fp16)[name = tensor("op_1210_cast_fp16")]; + tensor var_1212_equation_0 = const()[name = tensor("op_1212_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1212_cast_fp16 = einsum(equation = var_1212_equation_0, values = (var_1018_cast_fp16, var_924_cast_fp16))[name = tensor("op_1212_cast_fp16")]; + tensor var_1213_to_fp16 = const()[name = tensor("op_1213_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1214_cast_fp16 = mul(x = var_1212_cast_fp16, y = var_1213_to_fp16)[name = tensor("op_1214_cast_fp16")]; + tensor var_1216_equation_0 = const()[name = tensor("op_1216_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1216_cast_fp16 = einsum(equation = var_1216_equation_0, values = (var_1018_cast_fp16, var_928_cast_fp16))[name = tensor("op_1216_cast_fp16")]; + tensor var_1217_to_fp16 = const()[name = tensor("op_1217_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1218_cast_fp16 = mul(x = var_1216_cast_fp16, y = var_1217_to_fp16)[name = tensor("op_1218_cast_fp16")]; + tensor var_1220_equation_0 = const()[name = tensor("op_1220_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1220_cast_fp16 = einsum(equation = var_1220_equation_0, values = (var_1030_cast_fp16, var_932_cast_fp16))[name = tensor("op_1220_cast_fp16")]; + tensor var_1221_to_fp16 = const()[name = tensor("op_1221_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1222_cast_fp16 = mul(x = var_1220_cast_fp16, y = var_1221_to_fp16)[name = tensor("op_1222_cast_fp16")]; + tensor var_1224_equation_0 = const()[name = tensor("op_1224_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1224_cast_fp16 = einsum(equation = var_1224_equation_0, values = (var_1030_cast_fp16, var_936_cast_fp16))[name = tensor("op_1224_cast_fp16")]; + tensor var_1225_to_fp16 = const()[name = tensor("op_1225_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1226_cast_fp16 = mul(x = var_1224_cast_fp16, y = var_1225_to_fp16)[name = tensor("op_1226_cast_fp16")]; + tensor var_1228_equation_0 = const()[name = tensor("op_1228_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1228_cast_fp16 = einsum(equation = var_1228_equation_0, values = (var_1030_cast_fp16, var_940_cast_fp16))[name = tensor("op_1228_cast_fp16")]; + tensor var_1229_to_fp16 = const()[name = tensor("op_1229_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1230_cast_fp16 = mul(x = var_1228_cast_fp16, y = var_1229_to_fp16)[name = tensor("op_1230_cast_fp16")]; + tensor var_1232_equation_0 = const()[name = tensor("op_1232_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1232_cast_fp16 = einsum(equation = var_1232_equation_0, values = (var_1042_cast_fp16, var_944_cast_fp16))[name = tensor("op_1232_cast_fp16")]; + tensor var_1233_to_fp16 = const()[name = tensor("op_1233_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1234_cast_fp16 = mul(x = var_1232_cast_fp16, y = var_1233_to_fp16)[name = tensor("op_1234_cast_fp16")]; + tensor var_1236_equation_0 = const()[name = tensor("op_1236_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1236_cast_fp16 = einsum(equation = var_1236_equation_0, values = (var_1042_cast_fp16, var_948_cast_fp16))[name = tensor("op_1236_cast_fp16")]; + tensor var_1237_to_fp16 = const()[name = tensor("op_1237_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1238_cast_fp16 = mul(x = var_1236_cast_fp16, y = var_1237_to_fp16)[name = tensor("op_1238_cast_fp16")]; + tensor var_1240_equation_0 = const()[name = tensor("op_1240_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1240_cast_fp16 = einsum(equation = var_1240_equation_0, values = (var_1042_cast_fp16, var_952_cast_fp16))[name = tensor("op_1240_cast_fp16")]; + tensor var_1241_to_fp16 = const()[name = tensor("op_1241_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1242_cast_fp16 = mul(x = var_1240_cast_fp16, y = var_1241_to_fp16)[name = tensor("op_1242_cast_fp16")]; + tensor var_1244_equation_0 = const()[name = tensor("op_1244_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1244_cast_fp16 = einsum(equation = var_1244_equation_0, values = (var_1054_cast_fp16, var_956_cast_fp16))[name = tensor("op_1244_cast_fp16")]; + tensor var_1245_to_fp16 = const()[name = tensor("op_1245_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1246_cast_fp16 = mul(x = var_1244_cast_fp16, y = var_1245_to_fp16)[name = tensor("op_1246_cast_fp16")]; + tensor var_1248_equation_0 = const()[name = tensor("op_1248_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1248_cast_fp16 = einsum(equation = var_1248_equation_0, values = (var_1054_cast_fp16, var_960_cast_fp16))[name = tensor("op_1248_cast_fp16")]; + tensor var_1249_to_fp16 = const()[name = tensor("op_1249_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1250_cast_fp16 = mul(x = var_1248_cast_fp16, y = var_1249_to_fp16)[name = tensor("op_1250_cast_fp16")]; + tensor var_1252_equation_0 = const()[name = tensor("op_1252_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1252_cast_fp16 = einsum(equation = var_1252_equation_0, values = (var_1054_cast_fp16, var_964_cast_fp16))[name = tensor("op_1252_cast_fp16")]; + tensor var_1253_to_fp16 = const()[name = tensor("op_1253_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1254_cast_fp16 = mul(x = var_1252_cast_fp16, y = var_1253_to_fp16)[name = tensor("op_1254_cast_fp16")]; + tensor var_1256_equation_0 = const()[name = tensor("op_1256_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1256_cast_fp16 = einsum(equation = var_1256_equation_0, values = (var_1066_cast_fp16, var_968_cast_fp16))[name = tensor("op_1256_cast_fp16")]; + tensor var_1257_to_fp16 = const()[name = tensor("op_1257_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1258_cast_fp16 = mul(x = var_1256_cast_fp16, y = var_1257_to_fp16)[name = tensor("op_1258_cast_fp16")]; + tensor var_1260_equation_0 = const()[name = tensor("op_1260_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1260_cast_fp16 = einsum(equation = var_1260_equation_0, values = (var_1066_cast_fp16, var_972_cast_fp16))[name = tensor("op_1260_cast_fp16")]; + tensor var_1261_to_fp16 = const()[name = tensor("op_1261_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1262_cast_fp16 = mul(x = var_1260_cast_fp16, y = var_1261_to_fp16)[name = tensor("op_1262_cast_fp16")]; + tensor var_1264_equation_0 = const()[name = tensor("op_1264_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1264_cast_fp16 = einsum(equation = var_1264_equation_0, values = (var_1066_cast_fp16, var_976_cast_fp16))[name = tensor("op_1264_cast_fp16")]; + tensor var_1265_to_fp16 = const()[name = tensor("op_1265_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1266_cast_fp16 = mul(x = var_1264_cast_fp16, y = var_1265_to_fp16)[name = tensor("op_1266_cast_fp16")]; + tensor var_1268_equation_0 = const()[name = tensor("op_1268_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1268_cast_fp16 = einsum(equation = var_1268_equation_0, values = (var_1078_cast_fp16, var_980_cast_fp16))[name = tensor("op_1268_cast_fp16")]; + tensor var_1269_to_fp16 = const()[name = tensor("op_1269_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1270_cast_fp16 = mul(x = var_1268_cast_fp16, y = var_1269_to_fp16)[name = tensor("op_1270_cast_fp16")]; + tensor var_1272_equation_0 = const()[name = tensor("op_1272_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1272_cast_fp16 = einsum(equation = var_1272_equation_0, values = (var_1078_cast_fp16, var_984_cast_fp16))[name = tensor("op_1272_cast_fp16")]; + tensor var_1273_to_fp16 = const()[name = tensor("op_1273_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1274_cast_fp16 = mul(x = var_1272_cast_fp16, y = var_1273_to_fp16)[name = tensor("op_1274_cast_fp16")]; + tensor var_1276_equation_0 = const()[name = tensor("op_1276_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1276_cast_fp16 = einsum(equation = var_1276_equation_0, values = (var_1078_cast_fp16, var_988_cast_fp16))[name = tensor("op_1276_cast_fp16")]; + tensor var_1277_to_fp16 = const()[name = tensor("op_1277_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1278_cast_fp16 = mul(x = var_1276_cast_fp16, y = var_1277_to_fp16)[name = tensor("op_1278_cast_fp16")]; + tensor var_1280_equation_0 = const()[name = tensor("op_1280_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1280_cast_fp16 = einsum(equation = var_1280_equation_0, values = (var_1090_cast_fp16, var_992_cast_fp16))[name = tensor("op_1280_cast_fp16")]; + tensor var_1281_to_fp16 = const()[name = tensor("op_1281_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1282_cast_fp16 = mul(x = var_1280_cast_fp16, y = var_1281_to_fp16)[name = tensor("op_1282_cast_fp16")]; + tensor var_1284_equation_0 = const()[name = tensor("op_1284_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1284_cast_fp16 = einsum(equation = var_1284_equation_0, values = (var_1090_cast_fp16, var_996_cast_fp16))[name = tensor("op_1284_cast_fp16")]; + tensor var_1285_to_fp16 = const()[name = tensor("op_1285_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1286_cast_fp16 = mul(x = var_1284_cast_fp16, y = var_1285_to_fp16)[name = tensor("op_1286_cast_fp16")]; + tensor var_1288_equation_0 = const()[name = tensor("op_1288_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1288_cast_fp16 = einsum(equation = var_1288_equation_0, values = (var_1090_cast_fp16, var_1000_cast_fp16))[name = tensor("op_1288_cast_fp16")]; + tensor var_1289_to_fp16 = const()[name = tensor("op_1289_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1290_cast_fp16 = mul(x = var_1288_cast_fp16, y = var_1289_to_fp16)[name = tensor("op_1290_cast_fp16")]; + tensor var_1292_equation_0 = const()[name = tensor("op_1292_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1292_cast_fp16 = einsum(equation = var_1292_equation_0, values = (var_1102_cast_fp16, var_1004_cast_fp16))[name = tensor("op_1292_cast_fp16")]; + tensor var_1293_to_fp16 = const()[name = tensor("op_1293_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1294_cast_fp16 = mul(x = var_1292_cast_fp16, y = var_1293_to_fp16)[name = tensor("op_1294_cast_fp16")]; + tensor var_1296_equation_0 = const()[name = tensor("op_1296_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1296_cast_fp16 = einsum(equation = var_1296_equation_0, values = (var_1102_cast_fp16, var_1008_cast_fp16))[name = tensor("op_1296_cast_fp16")]; + tensor var_1297_to_fp16 = const()[name = tensor("op_1297_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1298_cast_fp16 = mul(x = var_1296_cast_fp16, y = var_1297_to_fp16)[name = tensor("op_1298_cast_fp16")]; + tensor var_1300_equation_0 = const()[name = tensor("op_1300_equation_0"), val = tensor("bkhc,bchq->bkhq")]; + tensor var_1300_cast_fp16 = einsum(equation = var_1300_equation_0, values = (var_1102_cast_fp16, var_1012_cast_fp16))[name = tensor("op_1300_cast_fp16")]; + tensor var_1301_to_fp16 = const()[name = tensor("op_1301_to_fp16"), val = tensor(0x1.6ap-4)]; + tensor var_1302_cast_fp16 = mul(x = var_1300_cast_fp16, y = var_1301_to_fp16)[name = tensor("op_1302_cast_fp16")]; + tensor aw_49_cast_fp16 = add(x = var_1210_cast_fp16, y = mask)[name = tensor("aw_49_cast_fp16")]; + tensor aw_51_cast_fp16 = add(x = var_1214_cast_fp16, y = mask)[name = tensor("aw_51_cast_fp16")]; + tensor aw_53_cast_fp16 = add(x = var_1218_cast_fp16, y = mask)[name = tensor("aw_53_cast_fp16")]; + tensor aw_55_cast_fp16 = add(x = var_1222_cast_fp16, y = mask)[name = tensor("aw_55_cast_fp16")]; + tensor aw_57_cast_fp16 = add(x = var_1226_cast_fp16, y = mask)[name = tensor("aw_57_cast_fp16")]; + tensor aw_59_cast_fp16 = add(x = var_1230_cast_fp16, y = mask)[name = tensor("aw_59_cast_fp16")]; + tensor aw_61_cast_fp16 = add(x = var_1234_cast_fp16, y = mask)[name = tensor("aw_61_cast_fp16")]; + tensor aw_63_cast_fp16 = add(x = var_1238_cast_fp16, y = mask)[name = tensor("aw_63_cast_fp16")]; + tensor aw_65_cast_fp16 = add(x = var_1242_cast_fp16, y = mask)[name = tensor("aw_65_cast_fp16")]; + tensor aw_67_cast_fp16 = add(x = var_1246_cast_fp16, y = mask)[name = tensor("aw_67_cast_fp16")]; + tensor aw_69_cast_fp16 = add(x = var_1250_cast_fp16, y = mask)[name = tensor("aw_69_cast_fp16")]; + tensor aw_71_cast_fp16 = add(x = var_1254_cast_fp16, y = mask)[name = tensor("aw_71_cast_fp16")]; + tensor aw_73_cast_fp16 = add(x = var_1258_cast_fp16, y = mask)[name = tensor("aw_73_cast_fp16")]; + tensor aw_75_cast_fp16 = add(x = var_1262_cast_fp16, y = mask)[name = tensor("aw_75_cast_fp16")]; + tensor aw_77_cast_fp16 = add(x = var_1266_cast_fp16, y = mask)[name = tensor("aw_77_cast_fp16")]; + tensor aw_79_cast_fp16 = add(x = var_1270_cast_fp16, y = mask)[name = tensor("aw_79_cast_fp16")]; + tensor aw_81_cast_fp16 = add(x = var_1274_cast_fp16, y = mask)[name = tensor("aw_81_cast_fp16")]; + tensor aw_83_cast_fp16 = add(x = var_1278_cast_fp16, y = mask)[name = tensor("aw_83_cast_fp16")]; + tensor aw_85_cast_fp16 = add(x = var_1282_cast_fp16, y = mask)[name = tensor("aw_85_cast_fp16")]; + tensor aw_87_cast_fp16 = add(x = var_1286_cast_fp16, y = mask)[name = tensor("aw_87_cast_fp16")]; + tensor aw_89_cast_fp16 = add(x = var_1290_cast_fp16, y = mask)[name = tensor("aw_89_cast_fp16")]; + tensor aw_91_cast_fp16 = add(x = var_1294_cast_fp16, y = mask)[name = tensor("aw_91_cast_fp16")]; + tensor aw_93_cast_fp16 = add(x = var_1298_cast_fp16, y = mask)[name = tensor("aw_93_cast_fp16")]; + tensor aw_cast_fp16 = add(x = var_1302_cast_fp16, y = mask)[name = tensor("aw_cast_fp16")]; + tensor var_1327_cast_fp16 = softmax(axis = var_779, x = aw_49_cast_fp16)[name = tensor("op_1327_cast_fp16")]; + tensor var_1328_cast_fp16 = softmax(axis = var_779, x = aw_51_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1329_cast_fp16 = softmax(axis = var_779, x = aw_53_cast_fp16)[name = tensor("op_1329_cast_fp16")]; + tensor var_1330_cast_fp16 = softmax(axis = var_779, x = aw_55_cast_fp16)[name = tensor("op_1330_cast_fp16")]; + tensor var_1331_cast_fp16 = softmax(axis = var_779, x = aw_57_cast_fp16)[name = tensor("op_1331_cast_fp16")]; + tensor var_1332_cast_fp16 = softmax(axis = var_779, x = aw_59_cast_fp16)[name = tensor("op_1332_cast_fp16")]; + tensor var_1333_cast_fp16 = softmax(axis = var_779, x = aw_61_cast_fp16)[name = tensor("op_1333_cast_fp16")]; + tensor var_1334_cast_fp16 = softmax(axis = var_779, x = aw_63_cast_fp16)[name = tensor("op_1334_cast_fp16")]; + tensor var_1335_cast_fp16 = softmax(axis = var_779, x = aw_65_cast_fp16)[name = tensor("op_1335_cast_fp16")]; + tensor var_1336_cast_fp16 = softmax(axis = var_779, x = aw_67_cast_fp16)[name = tensor("op_1336_cast_fp16")]; + tensor var_1337_cast_fp16 = softmax(axis = var_779, x = aw_69_cast_fp16)[name = tensor("op_1337_cast_fp16")]; + tensor var_1338_cast_fp16 = softmax(axis = var_779, x = aw_71_cast_fp16)[name = tensor("op_1338_cast_fp16")]; + tensor var_1339_cast_fp16 = softmax(axis = var_779, x = aw_73_cast_fp16)[name = tensor("op_1339_cast_fp16")]; + tensor var_1340_cast_fp16 = softmax(axis = var_779, x = aw_75_cast_fp16)[name = tensor("op_1340_cast_fp16")]; + tensor var_1341_cast_fp16 = softmax(axis = var_779, x = aw_77_cast_fp16)[name = tensor("op_1341_cast_fp16")]; + tensor var_1342_cast_fp16 = softmax(axis = var_779, x = aw_79_cast_fp16)[name = tensor("op_1342_cast_fp16")]; + tensor var_1343_cast_fp16 = softmax(axis = var_779, x = aw_81_cast_fp16)[name = tensor("op_1343_cast_fp16")]; + tensor var_1344_cast_fp16 = softmax(axis = var_779, x = aw_83_cast_fp16)[name = tensor("op_1344_cast_fp16")]; + tensor var_1345_cast_fp16 = softmax(axis = var_779, x = aw_85_cast_fp16)[name = tensor("op_1345_cast_fp16")]; + tensor var_1346_cast_fp16 = softmax(axis = var_779, x = aw_87_cast_fp16)[name = tensor("op_1346_cast_fp16")]; + tensor var_1347_cast_fp16 = softmax(axis = var_779, x = aw_89_cast_fp16)[name = tensor("op_1347_cast_fp16")]; + tensor var_1348_cast_fp16 = softmax(axis = var_779, x = aw_91_cast_fp16)[name = tensor("op_1348_cast_fp16")]; + tensor var_1349_cast_fp16 = softmax(axis = var_779, x = aw_93_cast_fp16)[name = tensor("op_1349_cast_fp16")]; + tensor var_1350_cast_fp16 = softmax(axis = var_779, x = aw_cast_fp16)[name = tensor("op_1350_cast_fp16")]; + tensor var_1352_equation_0 = const()[name = tensor("op_1352_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1352_cast_fp16 = einsum(equation = var_1352_equation_0, values = (var_1112_cast_fp16, var_1327_cast_fp16))[name = tensor("op_1352_cast_fp16")]; + tensor var_1354_equation_0 = const()[name = tensor("op_1354_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1354_cast_fp16 = einsum(equation = var_1354_equation_0, values = (var_1112_cast_fp16, var_1328_cast_fp16))[name = tensor("op_1354_cast_fp16")]; + tensor var_1356_equation_0 = const()[name = tensor("op_1356_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1356_cast_fp16 = einsum(equation = var_1356_equation_0, values = (var_1112_cast_fp16, var_1329_cast_fp16))[name = tensor("op_1356_cast_fp16")]; + tensor var_1358_equation_0 = const()[name = tensor("op_1358_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1358_cast_fp16 = einsum(equation = var_1358_equation_0, values = (var_1124_cast_fp16, var_1330_cast_fp16))[name = tensor("op_1358_cast_fp16")]; + tensor var_1360_equation_0 = const()[name = tensor("op_1360_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1360_cast_fp16 = einsum(equation = var_1360_equation_0, values = (var_1124_cast_fp16, var_1331_cast_fp16))[name = tensor("op_1360_cast_fp16")]; + tensor var_1362_equation_0 = const()[name = tensor("op_1362_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1362_cast_fp16 = einsum(equation = var_1362_equation_0, values = (var_1124_cast_fp16, var_1332_cast_fp16))[name = tensor("op_1362_cast_fp16")]; + tensor var_1364_equation_0 = const()[name = tensor("op_1364_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1364_cast_fp16 = einsum(equation = var_1364_equation_0, values = (var_1136_cast_fp16, var_1333_cast_fp16))[name = tensor("op_1364_cast_fp16")]; + tensor var_1366_equation_0 = const()[name = tensor("op_1366_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1366_cast_fp16 = einsum(equation = var_1366_equation_0, values = (var_1136_cast_fp16, var_1334_cast_fp16))[name = tensor("op_1366_cast_fp16")]; + tensor var_1368_equation_0 = const()[name = tensor("op_1368_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1368_cast_fp16 = einsum(equation = var_1368_equation_0, values = (var_1136_cast_fp16, var_1335_cast_fp16))[name = tensor("op_1368_cast_fp16")]; + tensor var_1370_equation_0 = const()[name = tensor("op_1370_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1370_cast_fp16 = einsum(equation = var_1370_equation_0, values = (var_1148_cast_fp16, var_1336_cast_fp16))[name = tensor("op_1370_cast_fp16")]; + tensor var_1372_equation_0 = const()[name = tensor("op_1372_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1372_cast_fp16 = einsum(equation = var_1372_equation_0, values = (var_1148_cast_fp16, var_1337_cast_fp16))[name = tensor("op_1372_cast_fp16")]; + tensor var_1374_equation_0 = const()[name = tensor("op_1374_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1374_cast_fp16 = einsum(equation = var_1374_equation_0, values = (var_1148_cast_fp16, var_1338_cast_fp16))[name = tensor("op_1374_cast_fp16")]; + tensor var_1376_equation_0 = const()[name = tensor("op_1376_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1376_cast_fp16 = einsum(equation = var_1376_equation_0, values = (var_1160_cast_fp16, var_1339_cast_fp16))[name = tensor("op_1376_cast_fp16")]; + tensor var_1378_equation_0 = const()[name = tensor("op_1378_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1378_cast_fp16 = einsum(equation = var_1378_equation_0, values = (var_1160_cast_fp16, var_1340_cast_fp16))[name = tensor("op_1378_cast_fp16")]; + tensor var_1380_equation_0 = const()[name = tensor("op_1380_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1380_cast_fp16 = einsum(equation = var_1380_equation_0, values = (var_1160_cast_fp16, var_1341_cast_fp16))[name = tensor("op_1380_cast_fp16")]; + tensor var_1382_equation_0 = const()[name = tensor("op_1382_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1382_cast_fp16 = einsum(equation = var_1382_equation_0, values = (var_1172_cast_fp16, var_1342_cast_fp16))[name = tensor("op_1382_cast_fp16")]; + tensor var_1384_equation_0 = const()[name = tensor("op_1384_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1384_cast_fp16 = einsum(equation = var_1384_equation_0, values = (var_1172_cast_fp16, var_1343_cast_fp16))[name = tensor("op_1384_cast_fp16")]; + tensor var_1386_equation_0 = const()[name = tensor("op_1386_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1386_cast_fp16 = einsum(equation = var_1386_equation_0, values = (var_1172_cast_fp16, var_1344_cast_fp16))[name = tensor("op_1386_cast_fp16")]; + tensor var_1388_equation_0 = const()[name = tensor("op_1388_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1388_cast_fp16 = einsum(equation = var_1388_equation_0, values = (var_1184_cast_fp16, var_1345_cast_fp16))[name = tensor("op_1388_cast_fp16")]; + tensor var_1390_equation_0 = const()[name = tensor("op_1390_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1390_cast_fp16 = einsum(equation = var_1390_equation_0, values = (var_1184_cast_fp16, var_1346_cast_fp16))[name = tensor("op_1390_cast_fp16")]; + tensor var_1392_equation_0 = const()[name = tensor("op_1392_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1392_cast_fp16 = einsum(equation = var_1392_equation_0, values = (var_1184_cast_fp16, var_1347_cast_fp16))[name = tensor("op_1392_cast_fp16")]; + tensor var_1394_equation_0 = const()[name = tensor("op_1394_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1394_cast_fp16 = einsum(equation = var_1394_equation_0, values = (var_1196_cast_fp16, var_1348_cast_fp16))[name = tensor("op_1394_cast_fp16")]; + tensor var_1396_equation_0 = const()[name = tensor("op_1396_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1396_cast_fp16 = einsum(equation = var_1396_equation_0, values = (var_1196_cast_fp16, var_1349_cast_fp16))[name = tensor("op_1396_cast_fp16")]; + tensor var_1398_equation_0 = const()[name = tensor("op_1398_equation_0"), val = tensor("bchk,bkhq->bchq")]; + tensor var_1398_cast_fp16 = einsum(equation = var_1398_equation_0, values = (var_1196_cast_fp16, var_1350_cast_fp16))[name = tensor("op_1398_cast_fp16")]; + tensor x_27_interleave_0 = const()[name = tensor("x_27_interleave_0"), val = tensor(false)]; + tensor x_27_cast_fp16 = concat(axis = var_779, interleave = x_27_interleave_0, values = (var_1352_cast_fp16, var_1354_cast_fp16, var_1356_cast_fp16, var_1358_cast_fp16, var_1360_cast_fp16, var_1362_cast_fp16, var_1364_cast_fp16, var_1366_cast_fp16, var_1368_cast_fp16, var_1370_cast_fp16, var_1372_cast_fp16, var_1374_cast_fp16, var_1376_cast_fp16, var_1378_cast_fp16, var_1380_cast_fp16, var_1382_cast_fp16, var_1384_cast_fp16, var_1386_cast_fp16, var_1388_cast_fp16, var_1390_cast_fp16, var_1392_cast_fp16, var_1394_cast_fp16, var_1396_cast_fp16, var_1398_cast_fp16))[name = tensor("x_27_cast_fp16")]; + tensor var_1403 = const()[name = tensor("op_1403"), val = tensor([1, 3072, -1, 8])]; + tensor input_13_cast_fp16 = reshape(shape = var_1403, x = x_27_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor var_1406 = const()[name = tensor("op_1406"), val = tensor([1, 1])]; + tensor var_1408 = const()[name = tensor("op_1408"), val = tensor([1, 1])]; + tensor attention_output_pad_type_0 = const()[name = tensor("attention_output_pad_type_0"), val = tensor("custom")]; + tensor attention_output_pad_0 = const()[name = tensor("attention_output_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_attn_proj_weight_to_fp16 = const()[name = tensor("blocks_1_attn_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(232803776)))]; + tensor attention_output_cast_fp16 = conv(dilations = var_1408, groups = var_779, pad = attention_output_pad_0, pad_type = attention_output_pad_type_0, strides = var_1406, weight = blocks_1_attn_proj_weight_to_fp16, x = input_13_cast_fp16)[name = tensor("attention_output_cast_fp16")]; + tensor x_29_cast_fp16 = add(x = attention_output_cast_fp16, y = x_17_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_eps_interleave_0 = const()[name = tensor("x_eps_interleave_0"), val = tensor(false)]; + tensor eps_chan_to_fp16 = const()[name = tensor("eps_chan_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678208)))]; + tensor x_eps_cast_fp16 = concat(axis = var_779, interleave = x_eps_interleave_0, values = (x_29_cast_fp16, eps_chan_to_fp16))[name = tensor("x_eps_cast_fp16")]; + tensor norm_x_axes_0 = const()[name = tensor("norm_x_axes_0"), val = tensor([1])]; + tensor norm_x_cast_fp16 = reduce_l2_norm(axes = norm_x_axes_0, keep_dims = var_782, x = x_eps_cast_fp16)[name = tensor("norm_x_cast_fp16")]; + tensor x_normed_19_cast_fp16 = real_div(x = x_29_cast_fp16, y = norm_x_cast_fp16)[name = tensor("x_normed_19_cast_fp16")]; + tensor var_1434_to_fp16 = const()[name = tensor("op_1434_to_fp16"), val = tensor(0x1.bb8p+5)]; + tensor x_normed_21_cast_fp16 = mul(x = x_normed_19_cast_fp16, y = var_1434_to_fp16)[name = tensor("x_normed_21_cast_fp16")]; + tensor blocks_1_norm_2_weight_to_fp16 = const()[name = tensor("blocks_1_norm_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251678400)))]; + tensor input_15_cast_fp16 = mul(x = x_normed_21_cast_fp16, y = blocks_1_norm_2_weight_to_fp16)[name = tensor("input_15_cast_fp16")]; + tensor var_1445 = const()[name = tensor("op_1445"), val = tensor([1, 1])]; + tensor var_1447 = const()[name = tensor("op_1447"), val = tensor([1, 1])]; + tensor input_17_pad_type_0 = const()[name = tensor("input_17_pad_type_0"), val = tensor("custom")]; + tensor input_17_pad_0 = const()[name = tensor("input_17_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_1_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_1_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(251684608)))]; + tensor input_17_cast_fp16 = conv(dilations = var_1447, groups = var_779, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = var_1445, weight = blocks_1_mlp_fc_1_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor var_1451 = const()[name = tensor("op_1451"), val = tensor([1, 1])]; + tensor var_1453 = const()[name = tensor("op_1453"), val = tensor([1, 1])]; + tensor x_fc_2_pad_type_0 = const()[name = tensor("x_fc_2_pad_type_0"), val = tensor("custom")]; + tensor x_fc_2_pad_0 = const()[name = tensor("x_fc_2_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_fc_2_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_fc_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(302016320)))]; + tensor x_fc_2_cast_fp16 = conv(dilations = var_1453, groups = var_779, pad = x_fc_2_pad_0, pad_type = x_fc_2_pad_type_0, strides = var_1451, weight = blocks_1_mlp_fc_2_weight_to_fp16, x = input_15_cast_fp16)[name = tensor("x_fc_2_cast_fp16")]; + tensor var_1456_cast_fp16 = silu(x = input_17_cast_fp16)[name = tensor("op_1456_cast_fp16")]; + tensor input_cast_fp16 = mul(x = var_1456_cast_fp16, y = x_fc_2_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor var_1459 = const()[name = tensor("op_1459"), val = tensor([1, 1])]; + tensor var_1461 = const()[name = tensor("op_1461"), val = tensor([1, 1])]; + tensor var_1463_pad_type_0 = const()[name = tensor("op_1463_pad_type_0"), val = tensor("custom")]; + tensor var_1463_pad_0 = const()[name = tensor("op_1463_pad_0"), val = tensor([0, 0, 0, 0])]; + tensor blocks_1_mlp_proj_weight_to_fp16 = const()[name = tensor("blocks_1_mlp_proj_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(352348032)))]; + tensor var_1463_cast_fp16 = conv(dilations = var_1461, groups = var_779, pad = var_1463_pad_0, pad_type = var_1463_pad_type_0, strides = var_1459, weight = blocks_1_mlp_proj_weight_to_fp16, x = input_cast_fp16)[name = tensor("op_1463_cast_fp16")]; + tensor new_x = add(x = var_1463_cast_fp16, y = x_29_cast_fp16)[name = tensor("op_1464_cast_fp16")]; + } -> (new_x, new_k_cache_0, new_v_cache_0, new_k_cache_1, new_v_cache_1); +} \ No newline at end of file