[feat][nmsis] add nmsis component and nn,dsp demo

2025-07-22 20:59:03 +00:00 · 2021-09-26 13:38:51 +08:00 · 2021-09-26 13:38:51 +08:00 · 5d1126d0f0
commit 5d1126d0f0
parent b2aada479b
989 changed files with 286224 additions and 0 deletions
--- a/examples/nn/nnTest/Ref_Implementations/FullyConnectedFunctions/riscv_fully_connected_mat_q7_vec_q15_opt_ref.c
+++ b/examples/nn/nnTest/Ref_Implementations/FullyConnectedFunctions/riscv_fully_connected_mat_q7_vec_q15_opt_ref.c
@ -0,0 +1,218 @@
+/*
+ * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS NN Library
+ * Title:        riscv_fully_connected_mat_q7_vec_q15_opt.c
+ * Description:  Mixed Q15-Q7 opt fully-connected layer function
+ *
+ * $Date:        17. January 2018
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor: RISC-V Cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "ref_functions.h"
+#include "riscv_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup FC
+ * @{
+ */
+
+  /**
+   * @brief Mixed Q15-Q7 opt fully-connected layer function
+   * @param[in]       pV          pointer to input vector
+   * @param[in]       pM          pointer to matrix weights
+   * @param[in]       dim_vec     length of the vector
+   * @param[in]       num_of_rows number of rows in weight matrix
+   * @param[in]       bias_shift  amount of left-shift for bias
+   * @param[in]       out_shift   amount of right-shift for output
+   * @param[in]       bias        pointer to bias
+   * @param[in,out]   pOut        pointer to output vector
+   * @param[in,out]   vec_buffer  pointer to buffer space for input
+   * @return     The function returns <code>RISCV_MATH_SUCCESS</code>
+   *
+   * @details
+   *
+   * <b>Buffer size:</b>
+   *
+   * vec_buffer size: 0
+   *
+   *  Q7_Q15 version of the fully connected layer
+   *
+   *  Weights are in q7_t and Activations are in q15_t
+   *
+   *  Limitation: x4 version requires weight reordering to work
+   *
+   *  Here we use only one pointer to read 4 rows in the weight
+   *  matrix. So if the original q7_t matrix looks like this:
+   *
+   *  | a11 | a12 | a13 | a14 | a15 | a16 | a17 |
+   *
+   *  | a21 | a22 | a23 | a24 | a25 | a26 | a27 |
+   *
+   *  | a31 | a32 | a33 | a34 | a35 | a36 | a37 |
+   *
+   *  | a41 | a42 | a43 | a44 | a45 | a46 | a47 |
+   *
+   *  | a51 | a52 | a53 | a54 | a55 | a56 | a57 |
+   *
+   *  | a61 | a62 | a63 | a64 | a65 | a66 | a67 |
+   *
+   *  We operates on multiple-of-4 rows, so the first four rows becomes
+   *
+   *  | a11 | a21 | a12 | a22 | a31 | a41 | a32 | a42 |
+   *
+   *  | a13 | a23 | a14 | a24 | a33 | a43 | a34 | a44 |
+   *
+   *  | a15 | a25 | a16 | a26 | a35 | a45 | a36 | a46 |
+   *
+   *  The column left over will be in-order.
+   *  which is:
+   *  | a17 | a27 | a37 | a47 |
+   *
+   *  For the left-over rows, we do 1x1 computation, so the data remains
+   *  as its original order.
+   *
+   *  So the stored weight matrix looks like this:
+   *
+   *  | a11 | a21 | a12 | a22 | a31 | a41 |
+   *
+   *  | a32 | a42 | a13 | a23 | a14 | a24 |
+   *
+   *  | a33 | a43 | a34 | a44 | a15 | a25 |
+   *
+   *  | a16 | a26 | a35 | a45 | a36 | a46 |
+   *
+   *  | a17 | a27 | a37 | a47 | a51 | a52 |
+   *
+   *  | a53 | a54 | a55 | a56 | a57 | a61 |
+   *
+   *  | a62 | a63 | a64 | a65 | a66 | a67 |
+   *
+   */
+
+//      REMOVED
+
+// riscv_status
+// riscv_fully_connected_mat_q7_vec_q15_opt_ref(const q15_t * pV,
+//                                            const q7_t * pM,
+//                                            const uint16_t dim_vec,
+//                                            const uint16_t num_of_rows,
+//                                            const uint16_t bias_shift,
+//                                            const uint16_t out_shift, const q7_t * bias, q15_t * pOut, q15_t * vec_buffer)
+// {
+
+//     (void)vec_buffer;
+//     /* Run the following code as reference implementation for RISC-V Core without DSP */
+//     uint16_t  rowCnt = num_of_rows >> 2;
+//     const q7_t *pB = pM;
+//     const q15_t *pA;
+//     q15_t    *pO = pOut;
+//     const q7_t *pBias = bias;
+
+//     while (rowCnt)
+//     {
+//         q31_t     sum =  ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
+//         q31_t     sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
+//         q31_t     sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
+//         q31_t     sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
+//         uint16_t  colCnt = dim_vec >> 1;
+
+//         pA = pV;
+
+//         while (colCnt)
+//         {
+//             q15_t     inA1 = *pA++;
+//             q15_t     inA2 = *pA++;
+
+//             q7_t      inB1 = *pB++;
+//             q7_t      inB3 = *pB++;
+//             q7_t      inB2 = *pB++;
+//             q7_t      inB4 = *pB++;
+
+//             sum += inA1 * inB1 + inA2 * inB2;
+//             sum2 += inA1 * inB3 + inA2 * inB4;
+
+//             inB1 = *pB++;
+//             inB3 = *pB++;
+//             inB2 = *pB++;
+//             inB4 = *pB++;
+
+//             sum3 += inA1 * inB1 + inA2 * inB2;
+//             sum4 += inA1 * inB3 + inA2 * inB4;
+
+//             colCnt--;
+//         }
+
+//         colCnt = dim_vec & 0x1;
+//         while (colCnt)
+//         {
+//             q15_t     inA = *pA++;
+//             q7_t      inB = *pB++;
+//             sum += inA * inB;
+//             inB = *pB++;
+//             sum2 += inA * inB;
+//             inB = *pB++;
+//             sum3 += inA * inB;
+//             inB = *pB++;
+//             sum4 += inA * inB;
+
+//             colCnt--;
+//         }
+//         *pO++ = (q15_t) __SSAT((sum >> out_shift), 16);
+//         *pO++ = (q15_t) __SSAT((sum2 >> out_shift), 16);
+//         *pO++ = (q15_t) __SSAT((sum3 >> out_shift), 16);
+//         *pO++ = (q15_t) __SSAT((sum4 >> out_shift), 16);
+
+//         rowCnt--;
+//     }
+
+//     rowCnt = num_of_rows & 0x3;
+
+//     while (rowCnt)
+//     {
+//         int       ip_out = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
+//         int       j;
+
+//         pA = pV;
+//         for (j = 0; j < dim_vec; j++)
+//         {
+//             q15_t     inA = *pA++;
+//             q7_t      inB = *pB++;
+//             ip_out += inA * inB;
+//         }
+//         *pO++ = (q15_t) __SSAT((ip_out >> out_shift), 16);
+
+//         rowCnt--;
+//     }
+//     /* Return to RISCV_MATH_SUCCESS */
+//     return (RISCV_MATH_SUCCESS);
+
+// }
+
+/**
+ * @} end of FC group
+ */
--- a/examples/nn/nnTest/Ref_Implementations/FullyConnectedFunctions/riscv_fully_connected_mat_q7_vec_q15_ref.c
+++ b/examples/nn/nnTest/Ref_Implementations/FullyConnectedFunctions/riscv_fully_connected_mat_q7_vec_q15_ref.c
@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS NN Library
+ * Title:        riscv_fully_connected_mat_q7_vec_q15.c
+ * Description:  Mixed Q15-Q7 fully-connected layer function
+ *
+ * $Date:        17. January 2018
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor: RISC-V Cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "ref_functions.h"
+#include "riscv_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup FC
+ * @{
+ */
+
+  /**
+   * @brief Mixed Q15-Q7 fully-connected layer function
+   * @param[in]       pV          pointer to input vector
+   * @param[in]       pM          pointer to matrix weights
+   * @param[in]       dim_vec     length of the vector
+   * @param[in]       num_of_rows number of rows in weight matrix
+   * @param[in]       bias_shift  amount of left-shift for bias
+   * @param[in]       out_shift   amount of right-shift for output
+   * @param[in]       bias        pointer to bias
+   * @param[in,out]   pOut        pointer to output vector
+   * @param[in,out]   vec_buffer  pointer to buffer space for input
+   * @return     The function returns <code>RISCV_MATH_SUCCESS</code>
+   *
+   * @details
+   *
+   * <b>Buffer size:</b>
+   *
+   * vec_buffer size: 0
+   *
+   *  Q7_Q15 version of the fully connected layer
+   *
+   *  Weights are in q7_t and Activations are in q15_t
+   *
+   */
+
+  //        REMOVED
+
+// riscv_status
+// riscv_fully_connected_mat_q7_vec_q15_ref(const q15_t * pV,
+//                                        const q7_t * pM,
+//                                        const uint16_t dim_vec,
+//                                        const uint16_t num_of_rows,
+//                                        const uint16_t bias_shift,
+//                                        const uint16_t out_shift,
+//                                        const q7_t * bias,
+//                                        q15_t * pOut,
+//                                        q15_t * vec_buffer)
+// {
+//     (void)vec_buffer;
+//     int       i, j;
+//     /* Run the following code as reference implementation for RISC-V Core without DSP */
+//     for (i = 0; i < num_of_rows; i++)
+//     {
+//         int       ip_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift);
+//         for (j = 0; j < dim_vec; j++)
+//         {
+//             ip_out += pV[j] * pM[i * dim_vec + j];
+//         }
+//         pOut[i] = (q15_t) __SSAT((ip_out >> out_shift), 16);
+//     }
+
+//     /* Return to RISCV_MATH_SUCCESS */
+//     return (RISCV_MATH_SUCCESS);
+
+// }
+
+/**
+ * @} end of FC group
+ */
--- a/examples/nn/nnTest/Ref_Implementations/FullyConnectedFunctions/riscv_fully_connected_q15_opt_ref.c
+++ b/examples/nn/nnTest/Ref_Implementations/FullyConnectedFunctions/riscv_fully_connected_q15_opt_ref.c
@ -0,0 +1,197 @@
+/*
+ * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS NN Library
+ * Title:        riscv_fully_connected_q15_opt.c
+ * Description:  Q15 opt fully-connected layer function
+ *
+ * $Date:        17. January 2018
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor: RISC-V Cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "ref_functions.h"
+#include "riscv_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup FC
+ * @{
+ */
+
+  /**
+   * @brief Q15 opt fully-connected layer function
+   * @param[in]       pV          pointer to input vector
+   * @param[in]       pM          pointer to matrix weights
+   * @param[in]       dim_vec     length of the vector
+   * @param[in]       num_of_rows number of rows in weight matrix
+   * @param[in]       bias_shift  amount of left-shift for bias
+   * @param[in]       out_shift   amount of right-shift for output
+   * @param[in]       bias        pointer to bias
+   * @param[in,out]   pOut        pointer to output vector
+   * @param[in,out]   vec_buffer  pointer to buffer space for input
+   * @return     The function returns <code>RISCV_MATH_SUCCESS</code>
+   *
+   *
+   * @details
+   *
+   * <b>Buffer size:</b>
+   *
+   * vec_buffer size: 0
+   *
+   *  Here we use only one pointer to read 4 rows in the weight
+   *  matrix. So if the original matrix looks like this:
+   *
+   *  | a11 | a12 | a13 |
+   *
+   *  | a21 | a22 | a23 |
+   *
+   *  | a31 | a32 | a33 |
+   *
+   *  | a41 | a42 | a43 |
+   *
+   *  | a51 | a52 | a53 |
+   *
+   *  | a61 | a62 | a63 |
+   *
+   *  We operates on multiple-of-4 rows, so the first four rows becomes
+   *
+   *  | a11 | a12 | a21 | a22 | a31 | a32 | a41 | a42 |
+   *
+   *  | a13 | a23 | a33 | a43 |
+   *
+   *  Remaining rows are kept the same original order.
+   *
+   *  So the stored weight matrix looks like this:
+   *
+   *
+   *  | a11 | a12 | a21 | a22 | a31 | a32 | a41 | a42 |
+   *
+   *  | a13 | a23 | a33 | a43 | a51 | a52 | a53 | a61 |
+   *
+   *  | a62 | a63 |
+   */
+
+  //        REMOVED
+
+// riscv_status
+// riscv_fully_connected_q15_opt_ref(const q15_t * pV,
+//                                 const q15_t * pM,
+//                                 const uint16_t dim_vec,
+//                                 const uint16_t num_of_rows,
+//                                 const uint16_t bias_shift,
+//                                 const uint16_t out_shift,
+//                                 const q15_t * bias,
+//                                 q15_t * pOut,
+//                                 q15_t * vec_buffer)
+// {
+//     (void)vec_buffer;
+//     /* Run the following code as reference implementation for RISC-V Core without DSP */
+//     uint16_t  rowCnt = num_of_rows >> 2;
+//     const q15_t *pB = pM;
+//     const q15_t *pA;
+//     q15_t    *pO = pOut;
+//     const q15_t *pBias = bias;
+
+//     while (rowCnt)
+//     {
+//         q31_t     sum =  ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
+//         q31_t     sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
+//         q31_t     sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
+//         q31_t     sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
+
+//         uint16_t  colCnt = dim_vec >> 1;
+
+//         pA = pV;
+//         while (colCnt)
+//         {
+//             q15_t     inA1 = *pA++;
+//             q15_t     inA2 = *pA++;
+
+//             q15_t     inB1 = *pB++;
+//             q15_t     inB2 = *pB++;
+//             sum += inA1 * inB1 + inA2 * inB2;
+
+//             inB1 = *pB++;
+//             inB2 = *pB++;
+//             sum2 += inA1 * inB1 + inA2 * inB2;
+
+//             inB1 = *pB++;
+//             inB2 = *pB++;
+//             sum3 += inA1 * inB1 + inA2 * inB2;
+
+//             inB1 = *pB++;
+//             inB2 = *pB++;
+//             sum4 += inA1 * inB1 + inA2 * inB2;
+
+//             colCnt--;
+//         }
+//         colCnt = dim_vec & 0x1;
+//         while (colCnt)
+//         {
+//             q15_t     inA = *pA++;
+//             q15_t     inB = *pB++;
+//             sum += inA * inB;
+//             inB = *pB++;
+//             sum2 += inA * inB;
+//             inB = *pB++;
+//             sum3 += inA * inB;
+//             inB = *pB++;
+//             sum4 += inA * inB;
+//             colCnt--;
+//         }
+//         *pO++ = (q15_t) __SSAT((sum >> out_shift), 16);
+//         *pO++ = (q15_t) __SSAT((sum2 >> out_shift), 16);
+//         *pO++ = (q15_t) __SSAT((sum3 >> out_shift), 16);
+//         *pO++ = (q15_t) __SSAT((sum4 >> out_shift), 16);
+
+//         rowCnt--;
+//     }
+//     rowCnt = num_of_rows & 0x3;
+
+//     while (rowCnt)
+//     {
+//         int       ip_out = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
+//         int       j;
+
+//         pA = pV;
+//         for (j = 0; j < dim_vec; j++)
+//         {
+//             q15_t     inA = *pA++;
+//             q15_t     inB = *pB++;
+//             ip_out += inA * inB;
+//         }
+//         *pO++ = (q15_t) __SSAT((ip_out >> out_shift), 16);
+
+//         rowCnt--;
+//     }
+//     /* Return to RISCV_MATH_SUCCESS */
+//     return (RISCV_MATH_SUCCESS);
+
+// }
+
+/**
+ * @} end of FC group
+ */
--- a/examples/nn/nnTest/Ref_Implementations/FullyConnectedFunctions/riscv_fully_connected_q15_ref.c
+++ b/examples/nn/nnTest/Ref_Implementations/FullyConnectedFunctions/riscv_fully_connected_q15_ref.c
@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS NN Library
+ * Title:        riscv_fully_connected_q15.c
+ * Description:  Q15 basic fully-connected layer function
+ *
+ * $Date:        17. January 2018
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor: RISC-V Cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "ref_functions.h"
+#include "riscv_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup FC
+ * @{
+ */
+
+  /**
+   * @brief Q15 opt fully-connected layer function
+   * @param[in]       pV          pointer to input vector
+   * @param[in]       pM          pointer to matrix weights
+   * @param[in]       dim_vec     length of the vector
+   * @param[in]       num_of_rows number of rows in weight matrix
+   * @param[in]       bias_shift  amount of left-shift for bias
+   * @param[in]       out_shift   amount of right-shift for output
+   * @param[in]       bias        pointer to bias
+   * @param[in,out]   pOut        pointer to output vector
+   * @param[in,out]   vec_buffer  pointer to buffer space for input
+   * @return     The function returns <code>RISCV_MATH_SUCCESS</code>
+   *
+   *
+   * @details
+   *
+   * <b>Buffer size:</b>
+   *
+   * vec_buffer size: 0
+   *
+   */
+
+  //        REMOVED
+
+// riscv_status
+// riscv_fully_connected_q15_ref(const q15_t * pV,
+//                             const q15_t * pM,
+//                             const uint16_t dim_vec,
+//                             const uint16_t num_of_rows,
+//                             const uint16_t bias_shift,
+//                             const uint16_t out_shift,
+//                             const q15_t * bias,
+//                             q15_t * pOut,
+//                             q15_t * vec_buffer)
+// {
+//     (void)vec_buffer;
+//     int       i, j;
+//     /* Run the following code as reference implementation for RISC-V Core without DSP */
+//     for (i = 0; i < num_of_rows; i++)
+//     {
+//         int       ip_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift);
+//         for (j = 0; j < dim_vec; j++)
+//         {
+//             ip_out += pV[j] * pM[i * dim_vec + j];
+//         }
+//         pOut[i] = (q15_t) __SSAT((ip_out >> out_shift), 16);
+//     }
+//     /* Return to application */
+//     return (RISCV_MATH_SUCCESS);
+
+// }
+
+/**
+ * @} end of FC group
+ */
--- a/examples/nn/nnTest/Ref_Implementations/FullyConnectedFunctions/riscv_fully_connected_q7_opt_ref.c
+++ b/examples/nn/nnTest/Ref_Implementations/FullyConnectedFunctions/riscv_fully_connected_q7_opt_ref.c
@ -0,0 +1,251 @@
+/*
+ * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS NN Library
+ * Title:        riscv_fully_connected_q7_opt.c
+ * Description:  Q7 basic fully-connected layer function
+ *
+ * $Date:        17. January 2018
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor: RISC-V Cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "ref_functions.h"
+#include "riscv_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup FC
+ * @{
+ */
+
+  /**
+   * @brief Q7 opt fully-connected layer function
+   * @param[in]       pV          pointer to input vector
+   * @param[in]       pM          pointer to matrix weights
+   * @param[in]       dim_vec     length of the vector
+   * @param[in]       num_of_rows number of rows in weight matrix
+   * @param[in]       bias_shift  amount of left-shift for bias
+   * @param[in]       out_shift   amount of right-shift for output
+   * @param[in]       bias        pointer to bias
+   * @param[in,out]   pOut        pointer to output vector
+   * @param[in,out]   vec_buffer  pointer to buffer space for input
+   * @return     The function returns <code>RISCV_MATH_SUCCESS</code>
+   *
+   * @details
+   *
+   * <b>Buffer size:</b>
+   *
+   * vec_buffer size: dim_vec
+   *
+   * This opt function is designed to work with interleaved weight
+   * matrix. The vector input is assumed in q7_t format, we call
+   *  riscv_q7_to_q15_no_shift_shuffle function to expand into
+   *  q15_t format with certain weight re-ordering, refer to the function
+   *  comments for more details.
+   *  Here we use only one pointer to read 4 rows in the weight
+   *  matrix. So if the original q7_t matrix looks like this:
+   *
+   *  | a11 | a12 | a13 | a14 | a15 | a16 | a17 |
+   *
+   *  | a21 | a22 | a23 | a24 | a25 | a26 | a27 |
+   *
+   *  | a31 | a32 | a33 | a34 | a35 | a36 | a37 |
+   *
+   *  | a41 | a42 | a43 | a44 | a45 | a46 | a47 |
+   *
+   *  | a51 | a52 | a53 | a54 | a55 | a56 | a57 |
+   *
+   *  | a61 | a62 | a63 | a64 | a65 | a66 | a67 |
+   *
+   *
+   *  We operates on multiple-of-4 rows, so the first four rows becomes
+   *
+   *  | a11 | a21 | a13 | a23 | a31 | a41 | a33 | a43 |
+   *
+   *  | a12 | a22 | a14 | a24 | a32 | a42 | a34 | a44 |
+   *
+   *  | a15 | a25 | a35 | a45 | a16 | a26 | a36 | a46 |
+   *
+   *  So within the kernel, we first read the re-ordered vector in as:
+   *
+   *  | b1  | b3  | and | b2  | b4  |
+   *
+   *  the four q31_t weights will look like
+   *
+   *  | a11 | a13 |, | a21 | a23 |, | a31 | a33 |, | a41 | a43 |
+   *
+   *  | a12 | a14 |, | a22 | a24 |, | a32 | a34 |, | a42 | a44 |
+   *
+   *  The column left over will be in-order.
+   *  which is:
+   *
+   *  | a17 | a27 | a37 | a47 |
+   *
+   *  For the left-over rows, we do 1x1 computation, so the data remains
+   *  as its original order.
+   *
+   *  So the stored weight matrix looks like this:
+   *
+   *  | a11 | a21 | a13 | a23 | a31 | a41 |
+   *
+   *  | a33 | a43 | a12 | a22 | a14 | a24 |
+   *
+   *  | a32 | a42 | a34 | a44 | a15 | a25 |
+   *
+   *  | a35 | a45 | a16 | a26 | a36 | a46 |
+   *
+   *  | a17 | a27 | a37 | a47 | a51 | a52 |
+   *
+   *  | a53 | a54 | a55 | a56 | a57 | a61 |
+   *
+   *  | a62 | a63 | a64 | a65 | a66 | a67 |
+   *
+   *
+   */
+
+//      REMOVED
+
+// riscv_status
+// riscv_fully_connected_q7_opt_ref(const q7_t * pV,
+//                                const q7_t * pM,
+//                                const uint16_t dim_vec,
+//                                const uint16_t num_of_rows,
+//                                const uint16_t bias_shift,
+//                                const uint16_t out_shift,
+//                                const q7_t * bias,
+//                                q7_t * pOut,
+//                                q15_t * vec_buffer)
+// {
+
+//     /* Run the following code as reference implementation for RISC-V Core without DSP */
+//     uint16_t  rowCnt = num_of_rows >> 2;
+//     const q7_t *pB = pM;
+//     const q7_t *pA;
+//     q7_t     *pO = pOut;
+//     const q7_t *pBias = bias;
+
+//     while (rowCnt)
+//     {
+//         q31_t     sum =  ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
+//         q31_t     sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
+//         q31_t     sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
+//         q31_t     sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
+
+//         uint16_t  colCnt = dim_vec >> 2;
+
+//         pA = pV;
+
+//         while (colCnt)
+//         {
+//             q7_t      inA1 = *pA++;
+//             q7_t      inA3 = *pA++;
+//             q7_t      inA2 = *pA++;
+//             q7_t      inA4 = *pA++;
+
+//             q7_t      inB1 = *pB++;
+//             q7_t      inB3 = *pB++;
+//             q7_t      inB2 = *pB++;
+//             q7_t      inB4 = *pB++;
+
+//             sum += inA1 * inB1 + inA2 * inB2;
+//             sum2 += inA1 * inB3 + inA2 * inB4;
+
+//             inB1 = *pB++;
+//             inB3 = *pB++;
+//             inB2 = *pB++;
+//             inB4 = *pB++;
+
+//             sum3 += inA1 * inB1 + inA2 * inB2;
+//             sum4 += inA1 * inB3 + inA2 * inB4;
+
+//             inB1 = *pB++;
+//             inB3 = *pB++;
+//             inB2 = *pB++;
+//             inB4 = *pB++;
+
+//             sum += inA3 * inB1 + inA4 * inB2;
+//             sum2 += inA3 * inB3 + inA4 * inB4;
+
+//             inB1 = *pB++;
+//             inB3 = *pB++;
+//             inB2 = *pB++;
+//             inB4 = *pB++;
+
+//             sum3 += inA3 * inB1 + inA4 * inB2;
+//             sum4 += inA3 * inB3 + inA4 * inB4;
+
+//             colCnt--;
+//         }
+//         colCnt = dim_vec & 0x3;
+//         while (colCnt)
+//         {
+//             q7_t      inA = *pA++;
+//             q7_t      inB = *pB++;
+//             sum += inA * inB;
+//             inB = *pB++;
+//             sum2 += inA * inB;
+//             inB = *pB++;
+//             sum3 += inA * inB;
+//             inB = *pB++;
+//             sum4 += inA * inB;
+
+//             colCnt--;
+//         }
+//         *pO++ = (q7_t) __SSAT((sum >> out_shift), 8);
+//         *pO++ = (q7_t) __SSAT((sum2 >> out_shift), 8);
+//         *pO++ = (q7_t) __SSAT((sum3 >> out_shift), 8);
+//         *pO++ = (q7_t) __SSAT((sum4 >> out_shift), 8);
+
+//         rowCnt--;
+//     }
+
+//     rowCnt = num_of_rows & 0x3;
+
+//     while (rowCnt)
+//     {
+//         int       ip_out = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
+
+//         int       j;
+
+//         pA = pV;
+//         for (j = 0; j < dim_vec; j++)
+//         {
+//             q7_t      inA = *pA++;
+//             q7_t      inB = *pB++;
+//             ip_out += inA * inB;
+//         }
+//         *pO++ = (q7_t) __SSAT((ip_out >> out_shift), 8);
+
+//         rowCnt--;
+//     }
+//     /* Return to RISCV_MATH_SUCCESS */
+//     return (RISCV_MATH_SUCCESS);
+
+// }
+
+/**
+ * @} end of FC group
+ */
--- a/examples/nn/nnTest/Ref_Implementations/FullyConnectedFunctions/riscv_fully_connected_q7_ref.c
+++ b/examples/nn/nnTest/Ref_Implementations/FullyConnectedFunctions/riscv_fully_connected_q7_ref.c
@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS NN Library
+ * Title:        riscv_fully_connected_q7.c
+ * Description:  Q7 basic fully-connected layer function
+ *
+ * $Date:        17. January 2018
+ * $Revision:    V.1.0.0
+ *
+ * Target Processor: RISC-V Cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "ref_functions.h"
+#include "riscv_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup FC
+ * @{
+ */
+
+  /**
+   * @brief Q7 basic fully-connected layer function
+   * @param[in]       pV          pointer to input vector
+   * @param[in]       pM          pointer to matrix weights
+   * @param[in]       dim_vec     length of the vector
+   * @param[in]       num_of_rows number of rows in weight matrix
+   * @param[in]       bias_shift  amount of left-shift for bias
+   * @param[in]       out_shift   amount of right-shift for output
+   * @param[in]       bias        pointer to bias
+   * @param[in,out]   pOut        pointer to output vector
+   * @param[in,out]   vec_buffer  pointer to buffer space for input
+   * @return     The function returns <code>RISCV_MATH_SUCCESS</code>
+   *
+   * @details
+   *
+   * <b>Buffer size:</b>
+   *
+   * vec_buffer size: dim_vec
+   *
+   * This basic function is designed to work with regular weight
+   * matrix without interleaving.
+   *
+   */
+
+
+//
+//      REMOVED
+//
+
+// riscv_status
+// riscv_fully_connected_q7_ref(const q7_t * pV,
+//                            const q7_t * pM,
+//                            const uint16_t dim_vec,
+//                            const uint16_t num_of_rows,
+//                            const uint16_t bias_shift,
+//                            const uint16_t out_shift, const q7_t * bias, q7_t * pOut, q15_t * vec_buffer)
+// {
+
+//     int       i, j;
+
+//     /* Run the following code as reference implementation for RISC-V Core without DSP */
+//     for (i = 0; i < num_of_rows; i++)
+//     {
+//         int       ip_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift);
+//         for (j = 0; j < dim_vec; j++)
+//         {
+//             ip_out += pV[j] * pM[i * dim_vec + j];
+//         }
+//         pOut[i] = (q7_t) __SSAT((ip_out >> out_shift), 8);
+//     }
+//     /* Return to RISCV_MATH_SUCCESS */
+//     return (RISCV_MATH_SUCCESS);
+
+// }
+
+/**
+ * @} end of FC group
+ */
--- a/examples/nn/nnTest/Ref_Implementations/FullyConnectedFunctions/riscv_fully_connected_s8_ref.c
+++ b/examples/nn/nnTest/Ref_Implementations/FullyConnectedFunctions/riscv_fully_connected_s8_ref.c
@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
+ * Copyright (c) 2019 Nuclei Limited. All rights reserved.
+ *
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * Licensed under the Apache License, Version 2.0 (the License); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an AS IS BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* ----------------------------------------------------------------------
+ * Project:      NMSIS NN Library
+ * Title:        riscv_fully_connected_s8
+ * Description:  Fully connected function compatible with TF Lite.
+ *
+ * $Date:        May 2, 2020
+ * $Revision:    V.2.0.0
+ *
+ * Target Processor: RISC-V Cores
+ *
+ * -------------------------------------------------------------------- */
+
+#include "ref_functions.h"
+#include "riscv_nnsupportfunctions.h"
+
+/**
+ *  @ingroup groupNN
+ */
+
+/**
+ * @addtogroup FC
+ * @{
+ */
+
+/*
+   * S8 basic fully-connected and matrix multiplication layer function for TensorFlow Lite
+   *
+   * Refer header file for details.
+   *
+   */
+
+riscv_status
+riscv_fully_connected_s8_ref(const nmsis_nn_context *ctx,
+                           const nmsis_nn_fc_params *fc_params,
+                           const nmsis_nn_per_tensor_quant_params *quant_params,
+                           const nmsis_nn_dims *input_dims,
+                           const q7_t *input,
+                           const nmsis_nn_dims *filter_dims,
+                           const q7_t *kernel,
+                           const nmsis_nn_dims *bias_dims,
+                           const int32_t *bias,
+                           const nmsis_nn_dims *output_dims,
+                           q7_t *output)
+{
+    (void)bias_dims;
+    (void)ctx;
+    int32_t batch_cnt = input_dims->n;
+
+    while (batch_cnt)
+    {
+        riscv_nn_vec_mat_mult_t_s8_ref(input,
+                                     kernel,
+                                     bias,
+                                     output,
+                                     fc_params->input_offset,
+                                     fc_params->filter_offset,
+                                     fc_params->output_offset,
+                                     quant_params->multiplier,
+                                     quant_params->shift,
+                                     filter_dims->n, /* col_dim or accum_depth */
+                                     output_dims->c, /* row_dim or output_depth */
+                                     fc_params->activation.min,
+                                     fc_params->activation.max);
+        input += filter_dims->n;
+        output += output_dims->c;
+        batch_cnt--;
+    }
+    return (RISCV_MATH_SUCCESS);
+}
+
+int32_t riscv_fully_connected_s8_get_buffer_size_ref(const nmsis_nn_dims *filter_dims)
+{
+    (void)filter_dims;
+    return 0;
+}
+
+/**
+ * @} end of FC group
+ */