[feat][nmsis] add nmsis component and nn,dsp demo

This commit is contained in:
jzlv 2021-09-26 13:38:51 +08:00
parent b2aada479b
commit 5d1126d0f0
989 changed files with 286224 additions and 0 deletions

View file

@ -0,0 +1,5 @@
set(TARGET_REQUIRED_PRIVATE_INCLUDE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/..)
set(TARGET_REQUIRED_SRCS conv.c)
set(TARGET_REQUIRED_LIBS nmsis)
set(mains main.c)
generate_bin()

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,272 @@
#include "ref.h"
void ref_conv_f32(float32_t *pSrcA, uint32_t srcALen, float32_t *pSrcB,
uint32_t srcBLen, float32_t *pDst)
{
float32_t sum; /* Accumulator */
uint32_t i, j; /* loop counters */
/* Loop to calculate convolution for output length number of times */
for (i = 0; i < srcALen + srcBLen - 1; i++) {
/* Initialize sum with zero to carry out MAC operations */
sum = 0.0f;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0; j <= i; j++) {
/* Check the array limitations */
if ((i - j < srcBLen) && (j < srcALen)) {
/* z[i] += x[i-j] * y[j] */
sum += pSrcB[i - j] * pSrcA[j];
}
}
/* Store the output in the destination buffer */
pDst[i] = sum;
}
}
riscv_status ref_conv_partial_f32(float32_t *pSrcA, uint32_t srcALen,
float32_t *pSrcB, uint32_t srcBLen,
float32_t *pDst, uint32_t firstIndex,
uint32_t numPoints)
{
ref_conv_f32(pSrcA, srcALen, pSrcB, srcBLen, pDst);
return RISCV_MATH_SUCCESS;
}
void ref_conv_q31(q31_t *pSrcA, uint32_t srcALen, q31_t *pSrcB,
uint32_t srcBLen, q31_t *pDst)
{
q63_t sum; /* Accumulator */
uint32_t i, j; /* loop counter */
/* Loop to calculate output of convolution for output length number of times
*/
for (i = 0; i < srcALen + srcBLen - 1; i++) {
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0; j <= i; j++) {
/* Check the array limitations */
if ((i - j < srcBLen) && (j < srcALen)) {
/* z[i] += x[i-j] * y[j] */
sum += (q63_t)pSrcA[j] * (pSrcB[i - j]);
}
}
/* Store the output in the destination buffer */
pDst[i] = (q31_t)(sum >> 31U);
}
}
void ref_conv_fast_q31(q31_t *pSrcA, uint32_t srcALen, q31_t *pSrcB,
uint32_t srcBLen, q31_t *pDst)
{
q31_t sum; /* Accumulator */
uint32_t i, j; /* loop counter */
/* Loop to calculate output of convolution for output length number of times
*/
for (i = 0; i < srcALen + srcBLen - 1; i++) {
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0; j <= i; j++) {
/* Check the array limitations */
if ((i - j < srcBLen) && (j < srcALen)) {
/* z[i] += x[i-j] * y[j] */
sum = (q31_t)(
(((q63_t)sum << 32) + ((q63_t)pSrcA[j] * pSrcB[i - j])) >>
32);
}
}
/* Store the output in the destination buffer */
pDst[i] = (q31_t)(sum << 1U);
}
}
riscv_status ref_conv_partial_q31(q31_t *pSrcA, uint32_t srcALen, q31_t *pSrcB,
uint32_t srcBLen, q31_t *pDst,
uint32_t firstIndex, uint32_t numPoints)
{
ref_conv_q31(pSrcA, srcALen, pSrcB, srcBLen, pDst);
return RISCV_MATH_SUCCESS;
}
riscv_status ref_conv_partial_fast_q31(q31_t *pSrcA, uint32_t srcALen,
q31_t *pSrcB, uint32_t srcBLen,
q31_t *pDst, uint32_t firstIndex,
uint32_t numPoints)
{
ref_conv_fast_q31(pSrcA, srcALen, pSrcB, srcBLen, pDst);
return RISCV_MATH_SUCCESS;
}
void ref_conv_q15(q15_t *pSrcA, uint32_t srcALen, q15_t *pSrcB,
uint32_t srcBLen, q15_t *pDst)
{
q63_t sum; /* Accumulator */
uint32_t i, j; /* loop counter */
/* Loop to calculate output of convolution for output length number of times
*/
for (i = 0; i < srcALen + srcBLen - 1; i++) {
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0; j <= i; j++) {
/* Check the array limitations */
if ((i - j < srcBLen) && (j < srcALen)) {
/* z[i] += x[i-j] * y[j] */
sum += (q31_t)pSrcA[j] * pSrcB[i - j];
}
}
/* Store the output in the destination buffer */
pDst[i] = ref_sat_q15(sum >> 15U);
}
}
riscv_status ref_conv_partial_fast_opt_q15(q15_t *pSrcA, uint32_t srcALen,
q15_t *pSrcB, uint32_t srcBLen,
q15_t *pDst, uint32_t firstIndex,
uint32_t numPoints, q15_t *pScratch1,
q15_t *pScratch2)
{
q31_t sum; /* Accumulator */
uint32_t i, j; /* loop counter */
/* Loop to calculate output of convolution for output length number of times
*/
for (i = 0; i < srcALen + srcBLen - 1; i++) {
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0; j <= i; j++) {
/* Check the array limitations */
if ((i - j < srcBLen) && (j < srcALen)) {
/* z[i] += x[i-j] * y[j] */
sum += (q31_t)pSrcA[j] * pSrcB[i - j];
}
}
/* Store the output in the destination buffer */
pDst[i] = ref_sat_q15(sum >> 15U);
}
return RISCV_MATH_SUCCESS;
}
void ref_conv_fast_q15(q15_t *pSrcA, uint32_t srcALen, q15_t *pSrcB,
uint32_t srcBLen, q15_t *pDst)
{
q31_t sum; /* Accumulator */
uint32_t i, j; /* loop counter */
/* Loop to calculate output of convolution for output length number of times
*/
for (i = 0; i < srcALen + srcBLen - 1; i++) {
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0; j <= i; j++) {
/* Check the array limitations */
if ((i - j < srcBLen) && (j < srcALen)) {
/* z[i] += x[i-j] * y[j] */
sum += (q31_t)pSrcA[j] * pSrcB[i - j];
}
}
/* Store the output in the destination buffer */
pDst[i] = sum >> 15U;
}
}
void ref_conv_fast_opt_q15(q15_t *pSrcA, uint32_t srcALen, q15_t *pSrcB,
uint32_t srcBLen, q15_t *pDst, q15_t *pScratch1,
q15_t *pScratch2)
{
q31_t sum; /* Accumulator */
uint32_t i, j; /* loop counter */
/* Loop to calculate output of convolution for output length number of times
*/
for (i = 0; i < srcALen + srcBLen - 1; i++) {
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0; j <= i; j++) {
/* Check the array limitations */
if ((i - j < srcBLen) && (j < srcALen)) {
/* z[i] += x[i-j] * y[j] */
sum += (q31_t)pSrcA[j] * pSrcB[i - j];
}
}
/* Store the output in the destination buffer */
pDst[i] = ref_sat_q15(sum >> 15U);
}
}
riscv_status ref_conv_partial_q15(q15_t *pSrcA, uint32_t srcALen, q15_t *pSrcB,
uint32_t srcBLen, q15_t *pDst,
uint32_t firstIndex, uint32_t numPoints)
{
ref_conv_q15(pSrcA, srcALen, pSrcB, srcBLen, pDst);
return RISCV_MATH_SUCCESS;
}
riscv_status ref_conv_partial_fast_q15(q15_t *pSrcA, uint32_t srcALen,
q15_t *pSrcB, uint32_t srcBLen,
q15_t *pDst, uint32_t firstIndex,
uint32_t numPoints)
{
ref_conv_fast_q15(pSrcA, srcALen, pSrcB, srcBLen, pDst);
return RISCV_MATH_SUCCESS;
}
void ref_conv_q7(q7_t *pSrcA, uint32_t srcALen, q7_t *pSrcB, uint32_t srcBLen,
q7_t *pDst)
{
q31_t sum; /* Accumulator */
uint32_t i, j; /* loop counter */
/* Loop to calculate output of convolution for output length number of times
*/
for (i = 0; i < srcALen + srcBLen - 1; i++) {
/* Initialize sum with zero to carry on MAC operations */
sum = 0;
/* Loop to perform MAC operations according to convolution equation */
for (j = 0; j <= i; j++) {
/* Check the array limitations */
if ((i - j < srcBLen) && (j < srcALen)) {
/* z[i] += x[i-j] * y[j] */
sum += (q15_t)pSrcA[j] * pSrcB[i - j];
}
}
/* Store the output in the destination buffer */
pDst[i] = (q7_t)ref_sat_q7(sum >> 7);
}
}
riscv_status ref_conv_partial_q7(q7_t *pSrcA, uint32_t srcALen, q7_t *pSrcB,
uint32_t srcBLen, q7_t *pDst,
uint32_t firstIndex, uint32_t numPoints)
{
ref_conv_q7(pSrcA, srcALen, pSrcB, srcBLen, pDst);
return RISCV_MATH_SUCCESS;
}

View file

@ -0,0 +1,175 @@
//
// Created by lujun on 19-6-28.
//
#include "riscv_math.h"
#include <stdint.h>
#include <stdlib.h>
#include "array.h"
#include "../common.h"
#include "../HelperFunctions/math_helper.c"
#include "../HelperFunctions/ref_helper.c"
#include <stdio.h>
#define DELTAF32 (0.05f)
#define DELTAQ31 (63)
#define DELTAQ15 (2)
#define DELTAQ7 (2)
int test_flag_error = 0;
int main()
{
uint16_t i;
BENCH_INIT;
// *****************************************************
// conv partical part
// *****************************************************
BENCH_START(riscv_conv_partial_f32);
riscv_conv_partial_f32(test_conv_input_f32_A, ARRAYA_SIZE_F32, test_conv_input_f32_B, ARRAYB_SIZE_F32,
output_f32, 10, 100);
BENCH_END(riscv_conv_partial_f32);
ref_conv_partial_f32(test_conv_input_f32_A, ARRAYA_SIZE_F32, test_conv_input_f32_B, ARRAYB_SIZE_F32,
output_f32_ref, 10, 100);
for (int i = 10; i < 100; i++)
if (fabs(output_f32_ref[i] - output_f32[i]) > DELTAF32) {
BENCH_ERROR(riscv_conv_partial_f32);
printf("index:%d, expect: %f, actual: %f\n", i, output_f32_ref[i],
output_f32[i]);
test_flag_error = 1;
}
BENCH_STATUS(riscv_conv_partial_f32);
BENCH_START(riscv_conv_partial_q31);
riscv_conv_partial_q31(test_conv_input_q31_A, ARRAYA_SIZE_Q31, test_conv_input_q31_B, ARRAYB_SIZE_Q31,
output_q31, 10, 200);
BENCH_END(riscv_conv_partial_q31);
ref_conv_partial_q31(test_conv_input_q31_A, ARRAYA_SIZE_Q31, test_conv_input_q31_B, ARRAYB_SIZE_Q31,
output_q31_ref, 10, 200);
for (int i = 10; i < 200; i++)
if (labs(output_q31_ref[i] - output_q31[i]) > DELTAQ31) {
BENCH_ERROR(riscv_conv_partial_q31);
printf("index:%d, expect: %d, actual: %d\n", i, output_q31_ref[i],
output_q31[i]);
test_flag_error = 1;
}
BENCH_STATUS(riscv_conv_partial_q31);
BENCH_START(riscv_conv_partial_q15);
riscv_conv_partial_q15(test_conv_input_q15_A, ARRAYA_SIZE_Q15, test_conv_input_q15_B, ARRAYB_SIZE_Q15,
output_q15, 10, 100);
BENCH_END(riscv_conv_partial_q15);
ref_conv_partial_q15(test_conv_input_q15_A, ARRAYA_SIZE_Q15, test_conv_input_q15_B, ARRAYB_SIZE_Q15,
output_q15_ref, 10, 100);
for (int i = 10; i < 100; i++)
if (abs(output_q15_ref[i] - output_q15[i]) > DELTAQ15) {
BENCH_ERROR(riscv_conv_partial_q15);
printf("index:%d, expect: %d, actual: %d\n", i, output_q15_ref[i],
output_q15[i]);
test_flag_error = 1;
}
BENCH_STATUS(riscv_conv_partial_q15);
BENCH_START(riscv_conv_partial_q7);
riscv_conv_partial_q7(test_conv_input_q7_A, ARRAYA_SIZE_Q7, test_conv_input_q7_B, ARRAYB_SIZE_Q7,
output_q7, 2, 14);
BENCH_END(riscv_conv_partial_q7);
ref_conv_partial_q7(test_conv_input_q7_A, ARRAYA_SIZE_Q7, test_conv_input_q7_B, ARRAYB_SIZE_Q7,
output_q7_ref, 2, 14);
for (int i = 2; i < 14; i++)
if (abs(output_q7_ref[i] - output_q7[i]) > DELTAQ7) {
BENCH_ERROR(riscv_conv_partial_q7);
printf("index:%d, expect: %d, actual: %d\n", i, output_q7_ref[i],
output_q7[i]);
test_flag_error = 1;
}
BENCH_STATUS(riscv_conv_partial_q7);
BENCH_START(riscv_conv_partial_fast_q31);
riscv_conv_partial_fast_q31(test_conv_input_q31_A, ARRAYA_SIZE_Q31, test_conv_input_q31_B,
ARRAYB_SIZE_Q31, output_q31, 10, 200);
BENCH_END(riscv_conv_partial_fast_q31);
ref_conv_partial_fast_q31(test_conv_input_q31_A, ARRAYA_SIZE_Q31, test_conv_input_q31_B,
ARRAYB_SIZE_Q31, output_q31_ref, 10, 200);
for (int i = 10; i < 200; i++)
if (labs(output_q31_ref[i] - output_q31[i]) > DELTAQ31) {
BENCH_ERROR(riscv_conv_partial_fast_q31);
printf("index:%d, expect: %d, actual: %d\n", i, output_q31_ref[i],
output_q31[i]);
test_flag_error = 1;
}
BENCH_STATUS(riscv_conv_partial_fast_q31);
BENCH_START(riscv_conv_partial_fast_q15);
riscv_conv_partial_fast_q15(test_conv_input_q15_A, ARRAYA_SIZE_Q15, test_conv_input_q15_B,
ARRAYB_SIZE_Q15, output_q15, 10, 100);
BENCH_END(riscv_conv_partial_fast_q15);
ref_conv_partial_fast_q15(test_conv_input_q15_A, ARRAYA_SIZE_Q15, test_conv_input_q15_B,
ARRAYB_SIZE_Q15, output_q15_ref, 10, 100);
for (int i = 10; i < 100; i++)
if (abs(output_q15_ref[i] - output_q15[i]) > DELTAQ15) {
BENCH_ERROR(riscv_conv_partial_fast_q15);
printf("index:%d, expect: %d, actual: %d\n", i, output_q15_ref[i],
output_q15[i]);
test_flag_error = 1;
}
BENCH_STATUS(riscv_conv_partial_fast_q15);
BENCH_START(riscv_conv_partial_opt_q15);
riscv_conv_partial_opt_q15(test_conv_input_q15_A, ARRAYA_SIZE_Q15, test_conv_input_q15_B,
ARRAYB_SIZE_Q15, output_q15, 10, 100, pScratch1, pScratch2);
BENCH_END(riscv_conv_partial_opt_q15);
ref_conv_partial_opt_q15(test_conv_input_q15_A, ARRAYA_SIZE_Q15, test_conv_input_q15_B,
ARRAYB_SIZE_Q15, output_q15_ref, 10, 100, pScratch1,
pScratch2);
for (int i = 10; i < 100; i++)
if (abs(output_q15_ref[i] - output_q15[i]) > DELTAQ15) {
BENCH_ERROR(riscv_conv_partial_opt_q15);
printf("index:%d, expect: %d, actual: %d\n", i, output_q15_ref[i],
output_q15[i]);
test_flag_error = 1;
}
BENCH_STATUS(riscv_conv_partial_opt_q15);
BENCH_START(riscv_conv_partial_opt_q7);
riscv_conv_partial_opt_q7(test_conv_input_q7_A, ARRAYA_SIZE_Q7, test_conv_input_q7_B, ARRAYB_SIZE_Q7,
output_q7, 2, 14, pScratch1, pScratch2);
BENCH_END(riscv_conv_partial_opt_q7);
ref_conv_partial_opt_q7(test_conv_input_q7_A, ARRAYA_SIZE_Q7, test_conv_input_q7_B, ARRAYB_SIZE_Q7,
output_q7_ref, 2, 14, pScratch1, pScratch2);
for (int i = 2; i < 14; i++)
if (abs(output_q7_ref[i] - output_q7[i]) > DELTAQ7) {
BENCH_ERROR(riscv_conv_partial_opt_q7);
printf("index:%d, expect: %d, actual: %d\n", i, output_q7_ref[i],
output_q7[i]);
test_flag_error = 1;
}
BENCH_STATUS(riscv_conv_partial_opt_q7);
BENCH_START(riscv_conv_partial_fast_opt_q15);
riscv_conv_partial_fast_opt_q15(test_conv_input_q15_A, ARRAYA_SIZE_Q15,
test_conv_input_q15_B, ARRAYB_SIZE_Q15, output_q15, 10,
100, pScratch1, pScratch2);
BENCH_END(riscv_conv_partial_fast_opt_q15);
ref_conv_partial_fast_opt_q15(test_conv_input_q15_A, ARRAYA_SIZE_Q15,
test_conv_input_q15_B, ARRAYB_SIZE_Q15, output_q15_ref,
10, 100, pScratch1, pScratch2);
for (int i = 10; i < 100; i++)
if (abs(output_q15_ref[i] - output_q15[i]) > DELTAQ15) {
BENCH_ERROR(riscv_conv_partial_fast_opt_q15);
printf("index:%d, expect: %d, actual: %d\n", i, output_q15_ref[i],
output_q15[i]);
test_flag_error = 1;
}
BENCH_STATUS(riscv_conv_partial_fast_opt_q15);
BENCH_FINISH;
if (test_flag_error) {
printf("test error apprears, please recheck.\n");
return 1;
} else {
printf("all test are passed. Well done!\n");
}
return 0;
}