|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef LYRA_CODEC_SPARSE_MATMUL_COMPUTE_MATMUL_FIXED_AVX2_H_ |
|
#define LYRA_CODEC_SPARSE_MATMUL_COMPUTE_MATMUL_FIXED_AVX2_H_ |
|
|
|
#include <cstdint> |
|
|
|
namespace csrblocksparse { |
|
namespace detail { |
|
|
|
|
|
|
|
void MatVec4x4FixedAVX2(const int16_t* weights_ptr, const int16_t* rhs, |
|
const int32_t* bias, const int32_t* nnz_per_row, |
|
const int16_t* rhs_indices, int start_row, int end_row, |
|
bool relu, int shift_out, int replicas, int stride, |
|
int16_t* output); |
|
|
|
|
|
void MatVec4x4FixedAVX2(const int16_t* weights_ptr, const int16_t* rhs, |
|
const int32_t* bias, const int32_t* nnz_per_row, |
|
const int16_t* rhs_indices, int start_row, int end_row, |
|
bool relu, int shift_out, int replicas, int stride, |
|
int32_t* output); |
|
|
|
|
|
void MatVec8x4FixedAVX2(const int16_t* weights_ptr, const int16_t* rhs, |
|
const int32_t* bias, const int32_t* nnz_per_row, |
|
const int16_t* rhs_indices, int start_row, int end_row, |
|
bool relu, int shift_out, int32_t* output); |
|
|
|
} |
|
} |
|
|
|
#endif |
|
|