#!/usr/bin/env python # coding: utf-8 import datetime from rknn.api import RKNN from sys import exit ONNX_MODEL = "RWKV-x070-World-0.1B-v2.8-20241210-ctx4096.onnx" RKNN_MODEL = ONNX_MODEL.replace(".onnx", ".rknn") DATASET = "" QUANTIZE = False detailed_performance_log = True timedate_iso = datetime.datetime.now().isoformat() rknn = RKNN(verbose=True) rknn.config( # mean_values=[x * 255 for x in [0.485, 0.456, 0.406]], # std_values=[x * 255 for x in [0.229, 0.224, 0.225]], quantized_dtype="w8a8", quantized_algorithm="normal", quantized_method="channel", quantized_hybrid_level=0, target_platform="rk3588", quant_img_RGB2BGR=False, float_dtype="float16", optimization_level=3, custom_string=f"converted at {timedate_iso}", remove_weight=False, compress_weight=False, inputs_yuv_fmt=None, single_core_mode=False, dynamic_input=None, model_pruning=False, op_target=None, quantize_weight=False, remove_reshape=False, sparse_infer=False, enable_flash_attention=False, # 隐藏的参数 # disable_rules=[], # sram_prefer=False, # nbuf_prefer=False, # check_data=[], ) ret = rknn.load_onnx(model=ONNX_MODEL) ret = rknn.build(do_quantization=QUANTIZE, dataset=DATASET, rknn_batch_size=None) ret = rknn.export_rknn(RKNN_MODEL) # ret = rknn.init_runtime(target='rk3588',device_id='cbb956772bf5dac9',core_mask=RKNN.NPU_CORE_0,perf_debug=detailed_performance_log) # rknn.eval_perf() # ret = rknn.accuracy_analysis(inputs=['../embeddings.npy','../state.npy','../scale_ratio.npy'], target='rk3588', device_id=device_id)