From d7dec9df35ac3d3b94bb544633bd3eeeb8655ddd Mon Sep 17 00:00:00 2001 From: Razvalyaev Date: Sat, 15 Nov 2025 08:22:07 +0300 Subject: [PATCH] =?UTF-8?q?=D0=A3=D0=B1=D1=80=D0=B0=D0=BD=D0=BE=20=D0=BB?= =?UTF-8?q?=D0=B8=D1=88=D0=BD=D0=B5=D0=B5=20=D0=B8=D0=B7=20CMSIS=20=D0=B1?= =?UTF-8?q?=D0=B8=D0=B1=D0=BB=D0=B8=D0=BE=D1=82=D0=B5=D0=BA=D0=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Добавление подключение DSP в конфиг периферии --- .../CMSIS/Core_A/Include/cmsis_armcc.h | 563 ---- .../CMSIS/Core_A/Include/cmsis_armclang.h | 614 ---- .../CMSIS/Core_A/Include/cmsis_compiler.h | 213 -- .../Drivers/CMSIS/Core_A/Include/cmsis_cp15.h | 514 ---- .../Drivers/CMSIS/Core_A/Include/cmsis_gcc.h | 917 ------ .../CMSIS/Core_A/Include/cmsis_iccarm.h | 573 ---- .../Drivers/CMSIS/Core_A/Include/core_ca.h | 2614 ----------------- .../Drivers/CMSIS/Core_A/Include/irq_ctrl.h | 192 -- .../CMSIS/Core_A/Source/irq_ctrl_gic.c | 418 --- .../CMSIS/DAP/Firmware/Config/DAP_config.h | 561 ---- .../Drivers/CMSIS/DAP/Firmware/Include/DAP.h | 367 --- .../Drivers/CMSIS/DAP/Firmware/Source/DAP.c | 1812 ------------ .../CMSIS/DAP/Firmware/Source/DAP_vendor.c | 100 - .../CMSIS/DAP/Firmware/Source/JTAG_DP.c | 370 --- .../Drivers/CMSIS/DAP/Firmware/Source/SWO.c | 798 ----- .../Drivers/CMSIS/DAP/Firmware/Source/SW_DP.c | 286 -- .../Drivers/CMSIS/DAP/Firmware/Source/UART.c | 652 ---- .../BasicMathFunctions.c | 0 .../BasicMathFunctionsF16.c | 0 .../{ => BayesFunctions}/BayesFunctions.c | 0 .../{ => BayesFunctions}/BayesFunctionsF16.c | 0 .../Source/{ => CommonTables}/CommonTables.c | 0 .../{ => CommonTables}/CommonTablesF16.c | 0 .../ComplexMathFunctions.c | 0 .../ComplexMathFunctionsF16.c | 0 .../ControllerFunctions.c | 0 .../DistanceFunctions.c | 0 .../DistanceFunctionsF16.c | 0 .../FastMathFunctions.c | 0 .../FastMathFunctionsF16.c | 0 .../FilteringFunctions.c | 0 .../FilteringFunctionsF16.c | 0 .../InterpolationFunctions.c | 0 .../InterpolationFunctionsF16.c | 0 .../{ => MatrixFunctions}/MatrixFunctions.c | 0 .../MatrixFunctionsF16.c | 0 .../QuaternionMathFunctions.c | 0 .../Source/{ => SVMFunctions}/SVMFunctions.c | 0 .../{ => SVMFunctions}/SVMFunctionsF16.c | 0 .../StatisticsFunctions.c | 0 .../StatisticsFunctionsF16.c | 0 .../{ => SupportFunctions}/SupportFunctions.c | 0 .../SupportFunctionsF16.c | 0 .../TransformFunctions.c | 0 .../TransformFunctionsF16.c | 0 .../Drivers/CMSIS/NN/CMakeLists.txt | 29 - .../CMSIS/NN/Include/arm_nn_math_types.h | 169 -- .../Drivers/CMSIS/NN/Include/arm_nn_tables.h | 56 - .../Drivers/CMSIS/NN/Include/arm_nn_types.h | 137 - .../CMSIS/NN/Include/arm_nnfunctions.h | 2532 ---------------- .../CMSIS/NN/Include/arm_nnsupportfunctions.h | 1186 -------- .../Source/ActivationFunctions/CMakeLists.txt | 30 - .../arm_nn_activations_q15.c | 96 - .../arm_nn_activations_q7.c | 89 - .../Source/ActivationFunctions/arm_relu6_s8.c | 65 - .../Source/ActivationFunctions/arm_relu_q15.c | 104 - .../Source/ActivationFunctions/arm_relu_q7.c | 109 - .../Source/BasicMathFunctions/CMakeLists.txt | 31 - .../arm_elementwise_add_s16.c | 105 - .../arm_elementwise_add_s8.c | 255 -- .../arm_elementwise_mul_s16.c | 95 - .../arm_elementwise_mul_s8.c | 200 -- .../Drivers/CMSIS/NN/Source/CMakeLists.txt | 98 - .../ConcatenationFunctions/CMakeLists.txt | 20 - .../arm_concatenation_s8_w.c | 66 - .../arm_concatenation_s8_x.c | 75 - .../arm_concatenation_s8_y.c | 76 - .../arm_concatenation_s8_z.c | 75 - .../ConvolutionFunctions/CMakeLists.txt | 24 - .../arm_convolve_1_x_n_s8.c | 205 -- .../arm_convolve_1x1_HWC_q7_fast_nonsquare.c | 235 -- .../arm_convolve_1x1_s8_fast.c | 161 - .../arm_convolve_HWC_q15_basic.c | 209 -- .../arm_convolve_HWC_q15_fast.c | 259 -- .../arm_convolve_HWC_q15_fast_nonsquare.c | 270 -- .../arm_convolve_HWC_q7_RGB.c | 280 -- .../arm_convolve_HWC_q7_basic.c | 227 -- .../arm_convolve_HWC_q7_basic_nonsquare.c | 229 -- .../arm_convolve_HWC_q7_fast.c | 380 --- .../arm_convolve_HWC_q7_fast_nonsquare.c | 378 --- .../arm_convolve_fast_s16.c | 241 -- .../ConvolutionFunctions/arm_convolve_s16.c | 156 - .../ConvolutionFunctions/arm_convolve_s8.c | 335 --- .../arm_convolve_wrapper_s16.c | 130 - .../arm_convolve_wrapper_s8.c | 133 - .../arm_depthwise_conv_3x3_s8.c | 212 -- .../arm_depthwise_conv_s16.c | 292 -- .../arm_depthwise_conv_s8.c | 347 --- .../arm_depthwise_conv_s8_opt.c | 433 --- .../arm_depthwise_conv_u8_basic_ver1.c | 336 --- .../arm_depthwise_conv_wrapper_s8.c | 135 - .../arm_depthwise_separable_conv_HWC_q7.c | 422 --- ...epthwise_separable_conv_HWC_q7_nonsquare.c | 427 --- .../arm_nn_depthwise_conv_s8_core.c | 218 -- .../arm_nn_mat_mult_kernel_q7_q15.c | 186 -- .../arm_nn_mat_mult_kernel_q7_q15_reordered.c | 137 - .../arm_nn_mat_mult_kernel_s8_s16.c | 245 -- .../arm_nn_mat_mult_kernel_s8_s16_reordered.c | 201 -- .../ConvolutionFunctions/arm_nn_mat_mult_s8.c | 180 -- .../FullyConnectedFunctions/CMakeLists.txt | 21 - .../arm_fully_connected_mat_q7_vec_q15.c | 197 -- .../arm_fully_connected_mat_q7_vec_q15_opt.c | 417 --- .../arm_fully_connected_q15.c | 195 -- .../arm_fully_connected_q15_opt.c | 336 --- .../arm_fully_connected_q7.c | 200 -- .../arm_fully_connected_q7_opt.c | 495 ---- .../arm_fully_connected_s16.c | 97 - .../arm_fully_connected_s8.c | 99 - .../Source/NNSupportFunctions/CMakeLists.txt | 26 - .../arm_nn_accumulate_q7_to_q15.c | 85 - .../Source/NNSupportFunctions/arm_nn_add_q7.c | 82 - .../arm_nn_depthwise_conv_nt_t_padded_s8.c | 168 -- .../arm_nn_depthwise_conv_nt_t_s8.c | 170 -- .../arm_nn_mat_mul_core_1x_s8.c | 86 - .../arm_nn_mat_mul_core_4x_s8.c | 137 - .../arm_nn_mat_mul_kernel_s16.c | 250 -- .../arm_nn_mat_mult_nt_t_s8.c | 582 ---- .../NNSupportFunctions/arm_nn_mult_q15.c | 73 - .../NNSupportFunctions/arm_nn_mult_q7.c | 73 - .../arm_nn_vec_mat_mult_t_s16.c | 211 -- .../arm_nn_vec_mat_mult_t_s8.c | 402 --- .../arm_nn_vec_mat_mult_t_svdf_s8.c | 341 --- .../Source/NNSupportFunctions/arm_nntables.c | 203 -- .../arm_q7_to_q15_no_shift.c | 121 - .../arm_q7_to_q15_reordered_no_shift.c | 143 - .../arm_q7_to_q15_reordered_with_offset.c | 100 - .../arm_q7_to_q15_with_offset.c | 114 - .../NN/Source/PoolingFunctions/CMakeLists.txt | 24 - .../Source/PoolingFunctions/arm_avgpool_s16.c | 128 - .../Source/PoolingFunctions/arm_avgpool_s8.c | 401 --- .../PoolingFunctions/arm_max_pool_s16.c | 180 -- .../Source/PoolingFunctions/arm_max_pool_s8.c | 229 -- .../Source/PoolingFunctions/arm_pool_q7_HWC.c | 464 --- .../NN/Source/ReshapeFunctions/CMakeLists.txt | 29 - .../Source/ReshapeFunctions/arm_reshape_s8.c | 56 - .../NN/Source/SVDFunctions/CMakeLists.txt | 31 - .../NN/Source/SVDFunctions/arm_svdf_s8.c | 254 -- .../SVDFunctions/arm_svdf_state_s16_s8.c | 267 -- .../NN/Source/SoftmaxFunctions/CMakeLists.txt | 30 - .../arm_nn_softmax_common_s8.c | 141 - .../Source/SoftmaxFunctions/arm_softmax_q15.c | 118 - .../Source/SoftmaxFunctions/arm_softmax_q7.c | 107 - .../Source/SoftmaxFunctions/arm_softmax_s16.c | 122 - .../Source/SoftmaxFunctions/arm_softmax_s8.c | 261 -- .../SoftmaxFunctions/arm_softmax_s8_s16.c | 55 - .../Source/SoftmaxFunctions/arm_softmax_u8.c | 103 - .../arm_softmax_with_batch_q7.c | 74 - .../Drivers/CMSIS/RTOS/Template/cmsis_os.h | 698 ----- .../Drivers/CMSIS/RTOS2/Include/cmsis_os2.h | 756 ----- .../Drivers/CMSIS/RTOS2/Include/os_tick.h | 80 - .../Drivers/CMSIS/RTOS2/Source/os_systick.c | 133 - .../Drivers/CMSIS/RTOS2/Source/os_tick_gtim.c | 187 -- .../Drivers/CMSIS/RTOS2/Source/os_tick_ptim.c | 165 -- .../Drivers/CMSIS/RTOS2/Template/cmsis_os.h | 922 ------ .../Drivers/CMSIS/RTOS2/Template/cmsis_os1.c | 361 --- .../stm32f4xx_matlab_conf.json | 64 +- MATLAB/MCU_Wrapper/run_mex.bat | 46 +- MATLAB/upp_r2023.slx | Bin 61073 -> 61888 bytes 158 files changed, 106 insertions(+), 36689 deletions(-) delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_armcc.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_armclang.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_compiler.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_cp15.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_gcc.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_iccarm.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/core_ca.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/irq_ctrl.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Source/irq_ctrl_gic.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Config/DAP_config.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Include/DAP.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/DAP.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/DAP_vendor.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/JTAG_DP.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/SWO.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/SW_DP.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/UART.c rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => BasicMathFunctions}/BasicMathFunctions.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => BasicMathFunctions}/BasicMathFunctionsF16.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => BayesFunctions}/BayesFunctions.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => BayesFunctions}/BayesFunctionsF16.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => CommonTables}/CommonTables.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => CommonTables}/CommonTablesF16.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => ComplexMathFunctions}/ComplexMathFunctions.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => ComplexMathFunctions}/ComplexMathFunctionsF16.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => ControllerFunctions}/ControllerFunctions.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => DistanceFunctions}/DistanceFunctions.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => DistanceFunctions}/DistanceFunctionsF16.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => FastMathFunctions}/FastMathFunctions.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => FastMathFunctions}/FastMathFunctionsF16.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => FilteringFunctions}/FilteringFunctions.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => FilteringFunctions}/FilteringFunctionsF16.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => InterpolationFunctions}/InterpolationFunctions.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => InterpolationFunctions}/InterpolationFunctionsF16.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => MatrixFunctions}/MatrixFunctions.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => MatrixFunctions}/MatrixFunctionsF16.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => QuaternionMathFunctions}/QuaternionMathFunctions.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => SVMFunctions}/SVMFunctions.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => SVMFunctions}/SVMFunctionsF16.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => StatisticsFunctions}/StatisticsFunctions.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => StatisticsFunctions}/StatisticsFunctionsF16.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => SupportFunctions}/SupportFunctions.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => SupportFunctions}/SupportFunctionsF16.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => TransformFunctions}/TransformFunctions.c (100%) rename MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/{ => TransformFunctions}/TransformFunctionsF16.c (100%) delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/CMakeLists.txt delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nn_math_types.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nn_tables.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nn_types.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nnfunctions.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nnsupportfunctions.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/CMakeLists.txt delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q15.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_relu6_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/CMakeLists.txt delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s16.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s16.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/CMakeLists.txt delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/CMakeLists.txt delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_w.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_x.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_y.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_z.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/CMakeLists.txt delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_fast_s16.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s16.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s16.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_3x3_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s16.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_u8_basic_ver1.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_depthwise_conv_s8_core.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/CMakeLists.txt delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s16.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/CMakeLists.txt delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_kernel_s16.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s16.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_svdf_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nntables.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_with_offset.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/CMakeLists.txt delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s16.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s16.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ReshapeFunctions/CMakeLists.txt delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SVDFunctions/CMakeLists.txt delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SVDFunctions/arm_svdf_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SVDFunctions/arm_svdf_state_s16_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/CMakeLists.txt delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_nn_softmax_common_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s16.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8_s16.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_u8.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS/Template/cmsis_os.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Include/cmsis_os2.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Include/os_tick.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Source/os_systick.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Source/os_tick_gtim.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Source/os_tick_ptim.c delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Template/cmsis_os.h delete mode 100644 MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Template/cmsis_os1.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_armcc.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_armcc.h deleted file mode 100644 index 0d9c374..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_armcc.h +++ /dev/null @@ -1,563 +0,0 @@ -/**************************************************************************//** - * @file cmsis_armcc.h - * @brief CMSIS compiler specific macros, functions, instructions - * @version V1.0.5 - * @date 05. May 2021 - ******************************************************************************/ -/* - * Copyright (c) 2009-2021 Arm Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __CMSIS_ARMCC_H -#define __CMSIS_ARMCC_H - -#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 400677) - #error "Please use Arm Compiler Toolchain V4.0.677 or later!" -#endif - -/* CMSIS compiler control architecture macros */ -#if (defined (__TARGET_ARCH_7_A ) && (__TARGET_ARCH_7_A == 1)) - #define __ARM_ARCH_7A__ 1 -#endif - -/* CMSIS compiler specific defines */ -#ifndef __ASM - #define __ASM __asm -#endif -#ifndef __INLINE - #define __INLINE __inline -#endif -#ifndef __FORCEINLINE - #define __FORCEINLINE __forceinline -#endif -#ifndef __STATIC_INLINE - #define __STATIC_INLINE static __inline -#endif -#ifndef __STATIC_FORCEINLINE - #define __STATIC_FORCEINLINE static __forceinline -#endif -#ifndef __NO_RETURN - #define __NO_RETURN __declspec(noreturn) -#endif -#ifndef CMSIS_DEPRECATED - #define CMSIS_DEPRECATED __attribute__((deprecated)) -#endif -#ifndef __USED - #define __USED __attribute__((used)) -#endif -#ifndef __WEAK - #define __WEAK __attribute__((weak)) -#endif -#ifndef __PACKED - #define __PACKED __attribute__((packed)) -#endif -#ifndef __PACKED_STRUCT - #define __PACKED_STRUCT __packed struct -#endif -#ifndef __UNALIGNED_UINT16_WRITE - #define __UNALIGNED_UINT16_WRITE(addr, val) ((*((__packed uint16_t *)(addr))) = (val)) -#endif -#ifndef __UNALIGNED_UINT16_READ - #define __UNALIGNED_UINT16_READ(addr) (*((const __packed uint16_t *)(addr))) -#endif -#ifndef __UNALIGNED_UINT32_WRITE - #define __UNALIGNED_UINT32_WRITE(addr, val) ((*((__packed uint32_t *)(addr))) = (val)) -#endif -#ifndef __UNALIGNED_UINT32_READ - #define __UNALIGNED_UINT32_READ(addr) (*((const __packed uint32_t *)(addr))) -#endif -#ifndef __ALIGNED - #define __ALIGNED(x) __attribute__((aligned(x))) -#endif -#ifndef __PACKED - #define __PACKED __attribute__((packed)) -#endif -#ifndef __COMPILER_BARRIER - #define __COMPILER_BARRIER() __memory_changed() -#endif - -/* ########################## Core Instruction Access ######################### */ -/** - \brief No Operation - */ -#define __NOP __nop - -/** - \brief Wait For Interrupt - */ -#define __WFI __wfi - -/** - \brief Wait For Event - */ -#define __WFE __wfe - -/** - \brief Send Event - */ -#define __SEV __sev - -/** - \brief Instruction Synchronization Barrier - */ -#define __ISB() __isb(0xF) - -/** - \brief Data Synchronization Barrier - */ -#define __DSB() __dsb(0xF) - -/** - \brief Data Memory Barrier - */ -#define __DMB() __dmb(0xF) - -/** - \brief Reverse byte order (32 bit) - \details Reverses the byte order in unsigned integer value. For example, 0x12345678 becomes 0x78563412. - \param [in] value Value to reverse - \return Reversed value - */ -#define __REV __rev - -/** - \brief Reverse byte order (16 bit) - \details Reverses the byte order within each halfword of a word. For example, 0x12345678 becomes 0x34127856. - \param [in] value Value to reverse - \return Reversed value - */ -#ifndef __NO_EMBEDDED_ASM -__attribute__((section(".rev16_text"))) __STATIC_INLINE __ASM uint32_t __REV16(uint32_t value) -{ - rev16 r0, r0 - bx lr -} -#endif - -/** - \brief Reverse byte order (16 bit) - \details Reverses the byte order in a 16-bit value and returns the signed 16-bit result. For example, 0x0080 becomes 0x8000. - \param [in] value Value to reverse - \return Reversed value - */ -#ifndef __NO_EMBEDDED_ASM -__attribute__((section(".revsh_text"))) __STATIC_INLINE __ASM int16_t __REVSH(int16_t value) -{ - revsh r0, r0 - bx lr -} -#endif - -/** - \brief Rotate Right in unsigned value (32 bit) - \param [in] op1 Value to rotate - \param [in] op2 Number of Bits to rotate - \return Rotated value - */ -#define __ROR __ror - -/** - \brief Breakpoint - \param [in] value is ignored by the processor. - If required, a debugger can use it to store additional information about the breakpoint. - */ -#define __BKPT(value) __breakpoint(value) - -/** - \brief Reverse bit order of value - \param [in] value Value to reverse - \return Reversed value - */ -#define __RBIT __rbit - -/** - \brief Count leading zeros - \param [in] value Value to count the leading zeros - \return number of leading zeros in value - */ -#define __CLZ __clz - -/** - \brief LDR Exclusive (8 bit) - \details Executes a exclusive LDR instruction for 8 bit value. - \param [in] ptr Pointer to data - \return value of type uint8_t at (*ptr) - */ -#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020) - #define __LDREXB(ptr) ((uint8_t ) __ldrex(ptr)) -#else - #define __LDREXB(ptr) _Pragma("push") _Pragma("diag_suppress 3731") ((uint8_t ) __ldrex(ptr)) _Pragma("pop") -#endif - -/** - \brief LDR Exclusive (16 bit) - \details Executes a exclusive LDR instruction for 16 bit values. - \param [in] ptr Pointer to data - \return value of type uint16_t at (*ptr) - */ -#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020) - #define __LDREXH(ptr) ((uint16_t) __ldrex(ptr)) -#else - #define __LDREXH(ptr) _Pragma("push") _Pragma("diag_suppress 3731") ((uint16_t) __ldrex(ptr)) _Pragma("pop") -#endif - -/** - \brief LDR Exclusive (32 bit) - \details Executes a exclusive LDR instruction for 32 bit values. - \param [in] ptr Pointer to data - \return value of type uint32_t at (*ptr) - */ -#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020) - #define __LDREXW(ptr) ((uint32_t ) __ldrex(ptr)) -#else - #define __LDREXW(ptr) _Pragma("push") _Pragma("diag_suppress 3731") ((uint32_t ) __ldrex(ptr)) _Pragma("pop") -#endif - -/** - \brief STR Exclusive (8 bit) - \details Executes a exclusive STR instruction for 8 bit values. - \param [in] value Value to store - \param [in] ptr Pointer to location - \return 0 Function succeeded - \return 1 Function failed - */ -#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020) - #define __STREXB(value, ptr) __strex(value, ptr) -#else - #define __STREXB(value, ptr) _Pragma("push") _Pragma("diag_suppress 3731") __strex(value, ptr) _Pragma("pop") -#endif - -/** - \brief STR Exclusive (16 bit) - \details Executes a exclusive STR instruction for 16 bit values. - \param [in] value Value to store - \param [in] ptr Pointer to location - \return 0 Function succeeded - \return 1 Function failed - */ -#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020) - #define __STREXH(value, ptr) __strex(value, ptr) -#else - #define __STREXH(value, ptr) _Pragma("push") _Pragma("diag_suppress 3731") __strex(value, ptr) _Pragma("pop") -#endif - -/** - \brief STR Exclusive (32 bit) - \details Executes a exclusive STR instruction for 32 bit values. - \param [in] value Value to store - \param [in] ptr Pointer to location - \return 0 Function succeeded - \return 1 Function failed - */ -#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020) - #define __STREXW(value, ptr) __strex(value, ptr) -#else - #define __STREXW(value, ptr) _Pragma("push") _Pragma("diag_suppress 3731") __strex(value, ptr) _Pragma("pop") -#endif - -/** - \brief Remove the exclusive lock - \details Removes the exclusive lock which is created by LDREX. - */ -#define __CLREX __clrex - - -/** - \brief Signed Saturate - \details Saturates a signed value. - \param [in] value Value to be saturated - \param [in] sat Bit position to saturate to (1..32) - \return Saturated value - */ -#define __SSAT __ssat - -/** - \brief Unsigned Saturate - \details Saturates an unsigned value. - \param [in] value Value to be saturated - \param [in] sat Bit position to saturate to (0..31) - \return Saturated value - */ -#define __USAT __usat - -/* ########################### Core Function Access ########################### */ - -/** - \brief Enable IRQ Interrupts - \details Enables IRQ interrupts by clearing the I-bit in the CPSR. - Can only be executed in Privileged modes. - */ -/* intrinsic void __enable_irq(); */ - -/** - \brief Disable IRQ Interrupts - \details Disables IRQ interrupts by setting the I-bit in the CPSR. - Can only be executed in Privileged modes. - */ -/* intrinsic void __disable_irq(void); */ - -/** - \brief Enable FIQ - \details Enables FIQ interrupts by clearing the F-bit in the CPSR. - Can only be executed in Privileged modes. - */ -#define __enable_fault_irq __enable_fiq - -/** - \brief Disable FIQ - \details Disables FIQ interrupts by setting the F-bit in the CPSR. - Can only be executed in Privileged modes. - */ -#define __disable_fault_irq __disable_fiq - -/** - \brief Get FPSCR (Floating Point Status/Control) - \return Floating Point Status/Control register value - */ -__STATIC_INLINE uint32_t __get_FPSCR(void) -{ -#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \ - (defined (__FPU_USED ) && (__FPU_USED == 1U)) ) - register uint32_t __regfpscr __ASM("fpscr"); - return(__regfpscr); -#else - return(0U); -#endif -} - -/** - \brief Set FPSCR (Floating Point Status/Control) - \param [in] fpscr Floating Point Status/Control value to set - */ -__STATIC_INLINE void __set_FPSCR(uint32_t fpscr) -{ -#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \ - (defined (__FPU_USED ) && (__FPU_USED == 1U)) ) - register uint32_t __regfpscr __ASM("fpscr"); - __regfpscr = (fpscr); -#else - (void)fpscr; -#endif -} - -/** \brief Get CPSR (Current Program Status Register) - \return CPSR Register value - */ -__STATIC_INLINE uint32_t __get_CPSR(void) -{ - register uint32_t __regCPSR __ASM("cpsr"); - return(__regCPSR); -} - - -/** \brief Set CPSR (Current Program Status Register) - \param [in] cpsr CPSR value to set - */ -__STATIC_INLINE void __set_CPSR(uint32_t cpsr) -{ - register uint32_t __regCPSR __ASM("cpsr"); - __regCPSR = cpsr; -} - -/** \brief Get Mode - \return Processor Mode - */ -__STATIC_INLINE uint32_t __get_mode(void) -{ - return (__get_CPSR() & 0x1FU); -} - -/** \brief Set Mode - \param [in] mode Mode value to set - */ -__STATIC_INLINE __ASM void __set_mode(uint32_t mode) -{ - MOV r1, lr - MSR CPSR_C, r0 - BX r1 -} - -/** \brief Get Stack Pointer - \return Stack Pointer - */ -__STATIC_INLINE __ASM uint32_t __get_SP(void) -{ - MOV r0, sp - BX lr -} - -/** \brief Set Stack Pointer - \param [in] stack Stack Pointer value to set - */ -__STATIC_INLINE __ASM void __set_SP(uint32_t stack) -{ - MOV sp, r0 - BX lr -} - - -/** \brief Get USR/SYS Stack Pointer - \return USR/SYSStack Pointer - */ -__STATIC_INLINE __ASM uint32_t __get_SP_usr(void) -{ - ARM - PRESERVE8 - - MRS R1, CPSR - CPS #0x1F ;no effect in USR mode - MOV R0, SP - MSR CPSR_c, R1 ;no effect in USR mode - ISB - BX LR -} - -/** \brief Set USR/SYS Stack Pointer - \param [in] topOfProcStack USR/SYS Stack Pointer value to set - */ -__STATIC_INLINE __ASM void __set_SP_usr(uint32_t topOfProcStack) -{ - ARM - PRESERVE8 - - MRS R1, CPSR - CPS #0x1F ;no effect in USR mode - MOV SP, R0 - MSR CPSR_c, R1 ;no effect in USR mode - ISB - BX LR -} - -/** \brief Get FPEXC (Floating Point Exception Control Register) - \return Floating Point Exception Control Register value - */ -__STATIC_INLINE uint32_t __get_FPEXC(void) -{ -#if (__FPU_PRESENT == 1) - register uint32_t __regfpexc __ASM("fpexc"); - return(__regfpexc); -#else - return(0); -#endif -} - -/** \brief Set FPEXC (Floating Point Exception Control Register) - \param [in] fpexc Floating Point Exception Control value to set - */ -__STATIC_INLINE void __set_FPEXC(uint32_t fpexc) -{ -#if (__FPU_PRESENT == 1) - register uint32_t __regfpexc __ASM("fpexc"); - __regfpexc = (fpexc); -#endif -} - -/* - * Include common core functions to access Coprocessor 15 registers - */ - -#define __get_CP(cp, op1, Rt, CRn, CRm, op2) do { register volatile uint32_t tmp __ASM("cp" # cp ":" # op1 ":c" # CRn ":c" # CRm ":" # op2); (Rt) = tmp; } while(0) -#define __set_CP(cp, op1, Rt, CRn, CRm, op2) do { register volatile uint32_t tmp __ASM("cp" # cp ":" # op1 ":c" # CRn ":c" # CRm ":" # op2); tmp = (Rt); } while(0) -#define __get_CP64(cp, op1, Rt, CRm) \ - do { \ - uint32_t ltmp, htmp; \ - __ASM volatile("MRRC p" # cp ", " # op1 ", ltmp, htmp, c" # CRm); \ - (Rt) = ((((uint64_t)htmp) << 32U) | ((uint64_t)ltmp)); \ - } while(0) - -#define __set_CP64(cp, op1, Rt, CRm) \ - do { \ - const uint64_t tmp = (Rt); \ - const uint32_t ltmp = (uint32_t)(tmp); \ - const uint32_t htmp = (uint32_t)(tmp >> 32U); \ - __ASM volatile("MCRR p" # cp ", " # op1 ", ltmp, htmp, c" # CRm); \ - } while(0) - -#include "cmsis_cp15.h" - -/** \brief Enable Floating Point Unit - - Critical section, called from undef handler, so systick is disabled - */ -__STATIC_INLINE __ASM void __FPU_Enable(void) -{ - ARM - - //Permit access to VFP/NEON, registers by modifying CPACR - MRC p15,0,R1,c1,c0,2 - ORR R1,R1,#0x00F00000 - MCR p15,0,R1,c1,c0,2 - - //Ensure that subsequent instructions occur in the context of VFP/NEON access permitted - ISB - - //Enable VFP/NEON - VMRS R1,FPEXC - ORR R1,R1,#0x40000000 - VMSR FPEXC,R1 - - //Initialise VFP/NEON registers to 0 - MOV R2,#0 - - //Initialise D16 registers to 0 - VMOV D0, R2,R2 - VMOV D1, R2,R2 - VMOV D2, R2,R2 - VMOV D3, R2,R2 - VMOV D4, R2,R2 - VMOV D5, R2,R2 - VMOV D6, R2,R2 - VMOV D7, R2,R2 - VMOV D8, R2,R2 - VMOV D9, R2,R2 - VMOV D10,R2,R2 - VMOV D11,R2,R2 - VMOV D12,R2,R2 - VMOV D13,R2,R2 - VMOV D14,R2,R2 - VMOV D15,R2,R2 - - IF {TARGET_FEATURE_EXTENSION_REGISTER_COUNT} == 32 - //Initialise D32 registers to 0 - VMOV D16,R2,R2 - VMOV D17,R2,R2 - VMOV D18,R2,R2 - VMOV D19,R2,R2 - VMOV D20,R2,R2 - VMOV D21,R2,R2 - VMOV D22,R2,R2 - VMOV D23,R2,R2 - VMOV D24,R2,R2 - VMOV D25,R2,R2 - VMOV D26,R2,R2 - VMOV D27,R2,R2 - VMOV D28,R2,R2 - VMOV D29,R2,R2 - VMOV D30,R2,R2 - VMOV D31,R2,R2 - ENDIF - - //Initialise FPSCR to a known state - VMRS R1,FPSCR - LDR R2,=0x00086060 //Mask off all bits that do not have to be preserved. Non-preserved bits can/should be zero. - AND R1,R1,R2 - VMSR FPSCR,R1 - - BX LR -} - -#endif /* __CMSIS_ARMCC_H */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_armclang.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_armclang.h deleted file mode 100644 index e64eba9..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_armclang.h +++ /dev/null @@ -1,614 +0,0 @@ -/**************************************************************************//** - * @file cmsis_armclang.h - * @brief CMSIS compiler specific macros, functions, instructions - * @version V1.2.1 - * @date 05. May 2021 - ******************************************************************************/ -/* - * Copyright (c) 2009-2021 Arm Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __CMSIS_ARMCLANG_H -#define __CMSIS_ARMCLANG_H - -#pragma clang system_header /* treat file as system include file */ - -/* CMSIS compiler specific defines */ -#ifndef __ASM - #define __ASM __asm -#endif -#ifndef __INLINE - #define __INLINE __inline -#endif -#ifndef __FORCEINLINE - #define __FORCEINLINE __attribute__((always_inline)) -#endif -#ifndef __STATIC_INLINE - #define __STATIC_INLINE static __inline -#endif -#ifndef __STATIC_FORCEINLINE - #define __STATIC_FORCEINLINE __attribute__((always_inline)) static __inline -#endif -#ifndef __NO_RETURN - #define __NO_RETURN __attribute__((__noreturn__)) -#endif -#ifndef CMSIS_DEPRECATED - #define CMSIS_DEPRECATED __attribute__((deprecated)) -#endif -#ifndef __USED - #define __USED __attribute__((used)) -#endif -#ifndef __WEAK - #define __WEAK __attribute__((weak)) -#endif -#ifndef __PACKED - #define __PACKED __attribute__((packed, aligned(1))) -#endif -#ifndef __PACKED_STRUCT - #define __PACKED_STRUCT struct __attribute__((packed, aligned(1))) -#endif -#ifndef __UNALIGNED_UINT16_WRITE - #pragma clang diagnostic push - #pragma clang diagnostic ignored "-Wpacked" -/*lint -esym(9058, T_UINT16_WRITE)*/ /* disable MISRA 2012 Rule 2.4 for T_UINT16_WRITE */ - __PACKED_STRUCT T_UINT16_WRITE { uint16_t v; }; - #pragma clang diagnostic pop - #define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void *)(addr))->v) = (val)) -#endif -#ifndef __UNALIGNED_UINT16_READ - #pragma clang diagnostic push - #pragma clang diagnostic ignored "-Wpacked" -/*lint -esym(9058, T_UINT16_READ)*/ /* disable MISRA 2012 Rule 2.4 for T_UINT16_READ */ - __PACKED_STRUCT T_UINT16_READ { uint16_t v; }; - #pragma clang diagnostic pop - #define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v) -#endif -#ifndef __UNALIGNED_UINT32_WRITE - #pragma clang diagnostic push - #pragma clang diagnostic ignored "-Wpacked" -/*lint -esym(9058, T_UINT32_WRITE)*/ /* disable MISRA 2012 Rule 2.4 for T_UINT32_WRITE */ - __PACKED_STRUCT T_UINT32_WRITE { uint32_t v; }; - #pragma clang diagnostic pop - #define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val)) -#endif -#ifndef __UNALIGNED_UINT32_READ - #pragma clang diagnostic push - #pragma clang diagnostic ignored "-Wpacked" - __PACKED_STRUCT T_UINT32_READ { uint32_t v; }; - #pragma clang diagnostic pop - #define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v) -#endif -#ifndef __ALIGNED - #define __ALIGNED(x) __attribute__((aligned(x))) -#endif -#ifndef __PACKED - #define __PACKED __attribute__((packed)) -#endif -#ifndef __COMPILER_BARRIER - #define __COMPILER_BARRIER() __ASM volatile("":::"memory") -#endif - -/* ########################## Core Instruction Access ######################### */ -/** - \brief No Operation - */ -#define __NOP __builtin_arm_nop - -/** - \brief Wait For Interrupt - */ -#define __WFI __builtin_arm_wfi - -/** - \brief Wait For Event - */ -#define __WFE __builtin_arm_wfe - -/** - \brief Send Event - */ -#define __SEV __builtin_arm_sev - -/** - \brief Instruction Synchronization Barrier - */ -#define __ISB() __builtin_arm_isb(0xF) - -/** - \brief Data Synchronization Barrier - */ -#define __DSB() __builtin_arm_dsb(0xF) - -/** - \brief Data Memory Barrier - */ -#define __DMB() __builtin_arm_dmb(0xF) - -/** - \brief Reverse byte order (32 bit) - \details Reverses the byte order in unsigned integer value. For example, 0x12345678 becomes 0x78563412. - \param [in] value Value to reverse - \return Reversed value - */ -#define __REV(value) __builtin_bswap32(value) - -/** - \brief Reverse byte order (16 bit) - \details Reverses the byte order within each halfword of a word. For example, 0x12345678 becomes 0x34127856. - \param [in] value Value to reverse - \return Reversed value - */ -#define __REV16(value) __ROR(__REV(value), 16) - - -/** - \brief Reverse byte order (16 bit) - \details Reverses the byte order in a 16-bit value and returns the signed 16-bit result. For example, 0x0080 becomes 0x8000. - \param [in] value Value to reverse - \return Reversed value - */ -#define __REVSH(value) (int16_t)__builtin_bswap16(value) - - -/** - \brief Rotate Right in unsigned value (32 bit) - \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits. - \param [in] op1 Value to rotate - \param [in] op2 Number of Bits to rotate - \return Rotated value - */ -__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2) -{ - op2 %= 32U; - if (op2 == 0U) - { - return op1; - } - return (op1 >> op2) | (op1 << (32U - op2)); -} - - -/** - \brief Breakpoint - \param [in] value is ignored by the processor. - If required, a debugger can use it to store additional information about the breakpoint. - */ -#define __BKPT(value) __ASM volatile ("bkpt "#value) - -/** - \brief Reverse bit order of value - \param [in] value Value to reverse - \return Reversed value - */ -#define __RBIT __builtin_arm_rbit - -/** - \brief Count leading zeros - \param [in] value Value to count the leading zeros - \return number of leading zeros in value - */ -__STATIC_FORCEINLINE uint8_t __CLZ(uint32_t value) -{ - /* Even though __builtin_clz produces a CLZ instruction on ARM, formally - __builtin_clz(0) is undefined behaviour, so handle this case specially. - This guarantees ARM-compatible results if happening to compile on a non-ARM - target, and ensures the compiler doesn't decide to activate any - optimisations using the logic "value was passed to __builtin_clz, so it - is non-zero". - ARM Compiler 6.10 and possibly earlier will optimise this test away, leaving a - single CLZ instruction. - */ - if (value == 0U) - { - return 32U; - } - return __builtin_clz(value); -} - -/** - \brief LDR Exclusive (8 bit) - \details Executes a exclusive LDR instruction for 8 bit value. - \param [in] ptr Pointer to data - \return value of type uint8_t at (*ptr) - */ -#define __LDREXB (uint8_t)__builtin_arm_ldrex - - -/** - \brief LDR Exclusive (16 bit) - \details Executes a exclusive LDR instruction for 16 bit values. - \param [in] ptr Pointer to data - \return value of type uint16_t at (*ptr) - */ -#define __LDREXH (uint16_t)__builtin_arm_ldrex - -/** - \brief LDR Exclusive (32 bit) - \details Executes a exclusive LDR instruction for 32 bit values. - \param [in] ptr Pointer to data - \return value of type uint32_t at (*ptr) - */ -#define __LDREXW (uint32_t)__builtin_arm_ldrex - -/** - \brief STR Exclusive (8 bit) - \details Executes a exclusive STR instruction for 8 bit values. - \param [in] value Value to store - \param [in] ptr Pointer to location - \return 0 Function succeeded - \return 1 Function failed - */ -#define __STREXB (uint32_t)__builtin_arm_strex - -/** - \brief STR Exclusive (16 bit) - \details Executes a exclusive STR instruction for 16 bit values. - \param [in] value Value to store - \param [in] ptr Pointer to location - \return 0 Function succeeded - \return 1 Function failed - */ -#define __STREXH (uint32_t)__builtin_arm_strex - -/** - \brief STR Exclusive (32 bit) - \details Executes a exclusive STR instruction for 32 bit values. - \param [in] value Value to store - \param [in] ptr Pointer to location - \return 0 Function succeeded - \return 1 Function failed - */ -#define __STREXW (uint32_t)__builtin_arm_strex - -/** - \brief Remove the exclusive lock - \details Removes the exclusive lock which is created by LDREX. - */ -#define __CLREX __builtin_arm_clrex - -/** - \brief Signed Saturate - \details Saturates a signed value. - \param [in] value Value to be saturated - \param [in] sat Bit position to saturate to (1..32) - \return Saturated value - */ -#define __SSAT __builtin_arm_ssat - -/** - \brief Unsigned Saturate - \details Saturates an unsigned value. - \param [in] value Value to be saturated - \param [in] sat Bit position to saturate to (0..31) - \return Saturated value - */ -#define __USAT __builtin_arm_usat - -/* ################### Compiler specific Intrinsics ########################### */ -/** \defgroup CMSIS_SIMD_intrinsics CMSIS SIMD Intrinsics - Access to dedicated SIMD instructions - @{ -*/ - -#if (defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1)) - -#define __SADD8 __builtin_arm_sadd8 -#define __SADD16 __builtin_arm_sadd16 -#define __QADD8 __builtin_arm_qadd8 -#define __QSUB8 __builtin_arm_qsub8 -#define __QADD16 __builtin_arm_qadd16 -#define __SHADD16 __builtin_arm_shadd16 -#define __QSUB16 __builtin_arm_qsub16 -#define __SHSUB16 __builtin_arm_shsub16 -#define __QASX __builtin_arm_qasx -#define __SHASX __builtin_arm_shasx -#define __QSAX __builtin_arm_qsax -#define __SHSAX __builtin_arm_shsax -#define __SXTB16 __builtin_arm_sxtb16 -#define __SMUAD __builtin_arm_smuad -#define __SMUADX __builtin_arm_smuadx -#define __SMLAD __builtin_arm_smlad -#define __SMLADX __builtin_arm_smladx -#define __SMLALD __builtin_arm_smlald -#define __SMLALDX __builtin_arm_smlaldx -#define __SMUSD __builtin_arm_smusd -#define __SMUSDX __builtin_arm_smusdx -#define __SMLSDX __builtin_arm_smlsdx -#define __USAT16 __builtin_arm_usat16 -#define __SSUB8 __builtin_arm_ssub8 -#define __SXTB16 __builtin_arm_sxtb16 -#define __SXTAB16 __builtin_arm_sxtab16 - - -__STATIC_FORCEINLINE int32_t __QADD( int32_t op1, int32_t op2) -{ - int32_t result; - - __ASM volatile ("qadd %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE int32_t __QSUB( int32_t op1, int32_t op2) -{ - int32_t result; - - __ASM volatile ("qsub %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -#define __PKHBT(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0x0000FFFFUL) | \ - ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL) ) - -#define __PKHTB(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0xFFFF0000UL) | \ - ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL) ) - -__STATIC_FORCEINLINE int32_t __SMMLA (int32_t op1, int32_t op2, int32_t op3) -{ - int32_t result; - - __ASM volatile ("smmla %0, %1, %2, %3" : "=r" (result): "r" (op1), "r" (op2), "r" (op3) ); - return(result); -} - -#endif /* (__ARM_FEATURE_DSP == 1) */ - -/* ########################### Core Function Access ########################### */ - -/** - \brief Enable IRQ Interrupts - \details Enables IRQ interrupts by clearing the I-bit in the CPSR. - Can only be executed in Privileged modes. - */ -__STATIC_FORCEINLINE void __enable_irq(void) -{ - __ASM volatile ("cpsie i" : : : "memory"); -} - -/** - \brief Disable IRQ Interrupts - \details Disables IRQ interrupts by setting the I-bit in the CPSR. - Can only be executed in Privileged modes. - */ -__STATIC_FORCEINLINE void __disable_irq(void) -{ - __ASM volatile ("cpsid i" : : : "memory"); -} - -/** - \brief Enable FIQ - \details Enables FIQ interrupts by clearing the F-bit in the CPSR. - Can only be executed in Privileged modes. - */ -__STATIC_FORCEINLINE void __enable_fault_irq(void) -{ - __ASM volatile ("cpsie f" : : : "memory"); -} - -/** - \brief Disable FIQ - \details Disables FIQ interrupts by setting the F-bit in the CPSR. - Can only be executed in Privileged modes. - */ -__STATIC_FORCEINLINE void __disable_fault_irq(void) -{ - __ASM volatile ("cpsid f" : : : "memory"); -} - -/** - \brief Get FPSCR - \details Returns the current value of the Floating Point Status/Control register. - \return Floating Point Status/Control register value - */ -#define __get_FPSCR __builtin_arm_get_fpscr - -/** - \brief Set FPSCR - \details Assigns the given value to the Floating Point Status/Control register. - \param [in] fpscr Floating Point Status/Control value to set - */ -#define __set_FPSCR __builtin_arm_set_fpscr - -/** \brief Get CPSR Register - \return CPSR Register value - */ -__STATIC_FORCEINLINE uint32_t __get_CPSR(void) -{ - uint32_t result; - __ASM volatile("MRS %0, cpsr" : "=r" (result) ); - return(result); -} - -/** \brief Set CPSR Register - \param [in] cpsr CPSR value to set - */ -__STATIC_FORCEINLINE void __set_CPSR(uint32_t cpsr) -{ - __ASM volatile ("MSR cpsr, %0" : : "r" (cpsr) : "cc", "memory"); -} - -/** \brief Get Mode - \return Processor Mode - */ -__STATIC_FORCEINLINE uint32_t __get_mode(void) -{ - return (__get_CPSR() & 0x1FU); -} - -/** \brief Set Mode - \param [in] mode Mode value to set - */ -__STATIC_FORCEINLINE void __set_mode(uint32_t mode) -{ - __ASM volatile("MSR cpsr_c, %0" : : "r" (mode) : "memory"); -} - -/** \brief Get Stack Pointer - \return Stack Pointer value - */ -__STATIC_FORCEINLINE uint32_t __get_SP(void) -{ - uint32_t result; - __ASM volatile("MOV %0, sp" : "=r" (result) : : "memory"); - return result; -} - -/** \brief Set Stack Pointer - \param [in] stack Stack Pointer value to set - */ -__STATIC_FORCEINLINE void __set_SP(uint32_t stack) -{ - __ASM volatile("MOV sp, %0" : : "r" (stack) : "memory"); -} - -/** \brief Get USR/SYS Stack Pointer - \return USR/SYS Stack Pointer value - */ -__STATIC_FORCEINLINE uint32_t __get_SP_usr(void) -{ - uint32_t cpsr; - uint32_t result; - __ASM volatile( - "MRS %0, cpsr \n" - "CPS #0x1F \n" // no effect in USR mode - "MOV %1, sp \n" - "MSR cpsr_c, %0 \n" // no effect in USR mode - "ISB" : "=r"(cpsr), "=r"(result) : : "memory" - ); - return result; -} - -/** \brief Set USR/SYS Stack Pointer - \param [in] topOfProcStack USR/SYS Stack Pointer value to set - */ -__STATIC_FORCEINLINE void __set_SP_usr(uint32_t topOfProcStack) -{ - uint32_t cpsr; - __ASM volatile( - "MRS %0, cpsr \n" - "CPS #0x1F \n" // no effect in USR mode - "MOV sp, %1 \n" - "MSR cpsr_c, %0 \n" // no effect in USR mode - "ISB" : "=r"(cpsr) : "r" (topOfProcStack) : "memory" - ); -} - -/** \brief Get FPEXC - \return Floating Point Exception Control register value - */ -__STATIC_FORCEINLINE uint32_t __get_FPEXC(void) -{ -#if (__FPU_PRESENT == 1) - uint32_t result; - __ASM volatile("VMRS %0, fpexc" : "=r" (result) : : "memory"); - return(result); -#else - return(0); -#endif -} - -/** \brief Set FPEXC - \param [in] fpexc Floating Point Exception Control value to set - */ -__STATIC_FORCEINLINE void __set_FPEXC(uint32_t fpexc) -{ -#if (__FPU_PRESENT == 1) - __ASM volatile ("VMSR fpexc, %0" : : "r" (fpexc) : "memory"); -#endif -} - -/* - * Include common core functions to access Coprocessor 15 registers - */ - -#define __get_CP(cp, op1, Rt, CRn, CRm, op2) __ASM volatile("MRC p" # cp ", " # op1 ", %0, c" # CRn ", c" # CRm ", " # op2 : "=r" (Rt) : : "memory" ) -#define __set_CP(cp, op1, Rt, CRn, CRm, op2) __ASM volatile("MCR p" # cp ", " # op1 ", %0, c" # CRn ", c" # CRm ", " # op2 : : "r" (Rt) : "memory" ) -#define __get_CP64(cp, op1, Rt, CRm) __ASM volatile("MRRC p" # cp ", " # op1 ", %Q0, %R0, c" # CRm : "=r" (Rt) : : "memory" ) -#define __set_CP64(cp, op1, Rt, CRm) __ASM volatile("MCRR p" # cp ", " # op1 ", %Q0, %R0, c" # CRm : : "r" (Rt) : "memory" ) - -#include "cmsis_cp15.h" - -/** \brief Enable Floating Point Unit - - Critical section, called from undef handler, so systick is disabled - */ -__STATIC_INLINE void __FPU_Enable(void) -{ - __ASM volatile( - //Permit access to VFP/NEON, registers by modifying CPACR - " MRC p15,0,R1,c1,c0,2 \n" - " ORR R1,R1,#0x00F00000 \n" - " MCR p15,0,R1,c1,c0,2 \n" - - //Ensure that subsequent instructions occur in the context of VFP/NEON access permitted - " ISB \n" - - //Enable VFP/NEON - " VMRS R1,FPEXC \n" - " ORR R1,R1,#0x40000000 \n" - " VMSR FPEXC,R1 \n" - - //Initialise VFP/NEON registers to 0 - " MOV R2,#0 \n" - - //Initialise D16 registers to 0 - " VMOV D0, R2,R2 \n" - " VMOV D1, R2,R2 \n" - " VMOV D2, R2,R2 \n" - " VMOV D3, R2,R2 \n" - " VMOV D4, R2,R2 \n" - " VMOV D5, R2,R2 \n" - " VMOV D6, R2,R2 \n" - " VMOV D7, R2,R2 \n" - " VMOV D8, R2,R2 \n" - " VMOV D9, R2,R2 \n" - " VMOV D10,R2,R2 \n" - " VMOV D11,R2,R2 \n" - " VMOV D12,R2,R2 \n" - " VMOV D13,R2,R2 \n" - " VMOV D14,R2,R2 \n" - " VMOV D15,R2,R2 \n" - -#if (defined(__ARM_NEON) && (__ARM_NEON == 1)) - //Initialise D32 registers to 0 - " VMOV D16,R2,R2 \n" - " VMOV D17,R2,R2 \n" - " VMOV D18,R2,R2 \n" - " VMOV D19,R2,R2 \n" - " VMOV D20,R2,R2 \n" - " VMOV D21,R2,R2 \n" - " VMOV D22,R2,R2 \n" - " VMOV D23,R2,R2 \n" - " VMOV D24,R2,R2 \n" - " VMOV D25,R2,R2 \n" - " VMOV D26,R2,R2 \n" - " VMOV D27,R2,R2 \n" - " VMOV D28,R2,R2 \n" - " VMOV D29,R2,R2 \n" - " VMOV D30,R2,R2 \n" - " VMOV D31,R2,R2 \n" -#endif - - //Initialise FPSCR to a known state - " VMRS R1,FPSCR \n" - " LDR R2,=0x00086060 \n" //Mask off all bits that do not have to be preserved. Non-preserved bits can/should be zero. - " AND R1,R1,R2 \n" - " VMSR FPSCR,R1 " - : : : "cc", "r1", "r2" - ); -} - -#endif /* __CMSIS_ARMCLANG_H */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_compiler.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_compiler.h deleted file mode 100644 index dfd07a2..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_compiler.h +++ /dev/null @@ -1,213 +0,0 @@ -/**************************************************************************//** - * @file cmsis_compiler.h - * @brief CMSIS compiler specific macros, functions, instructions - * @version V1.0.2 - * @date 10. January 2018 - ******************************************************************************/ -/* - * Copyright (c) 2009-2018 Arm Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __CMSIS_COMPILER_H -#define __CMSIS_COMPILER_H - -#include - -/* - * Arm Compiler 4/5 - */ -#if defined ( __CC_ARM ) - #include "cmsis_armcc.h" - - -/* - * Arm Compiler 6 (armclang) - */ -#elif defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) - #include "cmsis_armclang.h" - - -/* - * GNU Compiler - */ -#elif defined ( __GNUC__ ) - #include "cmsis_gcc.h" - - -/* - * IAR Compiler - */ -#elif defined ( __ICCARM__ ) - #include "cmsis_iccarm.h" - - -/* - * TI Arm Compiler - */ -#elif defined ( __TI_ARM__ ) - #include - - #ifndef __ASM - #define __ASM __asm - #endif - #ifndef __INLINE - #define __INLINE inline - #endif - #ifndef __STATIC_INLINE - #define __STATIC_INLINE static inline - #endif - #ifndef __STATIC_INLINE - #define __STATIC_INLINE static inline - #endif - #ifndef __STATIC_FORCEINLINE - #define __STATIC_FORCEINLINE __STATIC_INLINE - #endif - #ifndef __NO_RETURN - #define __NO_RETURN __attribute__((noreturn)) - #endif - #ifndef CMSIS_DEPRECATED - #define CMSIS_DEPRECATED __attribute__((deprecated)) - #endif - #ifndef __USED - #define __USED __attribute__((used)) - #endif - #ifndef __WEAK - #define __WEAK __attribute__((weak)) - #endif - #ifndef __UNALIGNED_UINT32 - struct __attribute__((packed)) T_UINT32 { uint32_t v; }; - #define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v) - #endif - #ifndef __ALIGNED - #define __ALIGNED(x) __attribute__((aligned(x))) - #endif - #ifndef __PACKED - #define __PACKED __attribute__((packed)) - #endif - #ifndef __COMPILER_BARRIER - #warning No compiler specific solution for __COMPILER_BARRIER. __COMPILER_BARRIER is ignored. - #define __COMPILER_BARRIER() (void)0 - #endif - - -/* - * TASKING Compiler - */ -#elif defined ( __TASKING__ ) - /* - * The CMSIS functions have been implemented as intrinsics in the compiler. - * Please use "carm -?i" to get an up to date list of all intrinsics, - * Including the CMSIS ones. - */ - - #ifndef __ASM - #define __ASM __asm - #endif - #ifndef __INLINE - #define __INLINE inline - #endif - #ifndef __STATIC_INLINE - #define __STATIC_INLINE static inline - #endif - #ifndef __STATIC_FORCEINLINE - #define __STATIC_FORCEINLINE __STATIC_INLINE - #endif - #ifndef __NO_RETURN - #define __NO_RETURN __attribute__((noreturn)) - #endif - #ifndef CMSIS_DEPRECATED - #define CMSIS_DEPRECATED __attribute__((deprecated)) - #endif - #ifndef __USED - #define __USED __attribute__((used)) - #endif - #ifndef __WEAK - #define __WEAK __attribute__((weak)) - #endif - #ifndef __UNALIGNED_UINT32 - struct __packed__ T_UINT32 { uint32_t v; }; - #define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v) - #endif - #ifndef __ALIGNED - #define __ALIGNED(x) __align(x) - #endif - #ifndef __PACKED - #define __PACKED __packed__ - #endif - #ifndef __COMPILER_BARRIER - #warning No compiler specific solution for __COMPILER_BARRIER. __COMPILER_BARRIER is ignored. - #define __COMPILER_BARRIER() (void)0 - #endif - - -/* - * COSMIC Compiler - */ -#elif defined ( __CSMC__ ) - #include - - #ifndef __ASM - #define __ASM _asm - #endif - #ifndef __INLINE - #define __INLINE inline - #endif - #ifndef __STATIC_INLINE - #define __STATIC_INLINE static inline - #endif - #ifndef __STATIC_FORCEINLINE - #define __STATIC_FORCEINLINE __STATIC_INLINE - #endif - #ifndef __NO_RETURN - // NO RETURN is automatically detected hence no warning here - #define __NO_RETURN - #endif - #ifndef __USED - #warning No compiler specific solution for __USED. __USED is ignored. - #define __USED - #endif - #ifndef CMSIS_DEPRECATED - #warning No compiler specific solution for CMSIS_DEPRECATED. CMSIS_DEPRECATED is ignored. - #define CMSIS_DEPRECATED - #endif - #ifndef __WEAK - #define __WEAK __weak - #endif - #ifndef __UNALIGNED_UINT32 - @packed struct T_UINT32 { uint32_t v; }; - #define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v) - #endif - #ifndef __ALIGNED - #warning No compiler specific solution for __ALIGNED. __ALIGNED is ignored. - #define __ALIGNED(x) - #endif - #ifndef __PACKED - #define __PACKED @packed - #endif - #ifndef __COMPILER_BARRIER - #warning No compiler specific solution for __COMPILER_BARRIER. __COMPILER_BARRIER is ignored. - #define __COMPILER_BARRIER() (void)0 - #endif - - -#else - #error Unknown compiler. -#endif - - -#endif /* __CMSIS_COMPILER_H */ - diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_cp15.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_cp15.h deleted file mode 100644 index 891bec2..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_cp15.h +++ /dev/null @@ -1,514 +0,0 @@ -/**************************************************************************//** - * @file cmsis_cp15.h - * @brief CMSIS compiler specific macros, functions, instructions - * @version V1.0.1 - * @date 07. Sep 2017 - ******************************************************************************/ -/* - * Copyright (c) 2009-2017 ARM Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#if defined ( __ICCARM__ ) - #pragma system_include /* treat file as system include file for MISRA check */ -#elif defined (__clang__) - #pragma clang system_header /* treat file as system include file */ -#endif - -#ifndef __CMSIS_CP15_H -#define __CMSIS_CP15_H - -/** \brief Get ACTLR - \return Auxiliary Control register value - */ -__STATIC_FORCEINLINE uint32_t __get_ACTLR(void) -{ - uint32_t result; - __get_CP(15, 0, result, 1, 0, 1); - return(result); -} - -/** \brief Set ACTLR - \param [in] actlr Auxiliary Control value to set - */ -__STATIC_FORCEINLINE void __set_ACTLR(uint32_t actlr) -{ - __set_CP(15, 0, actlr, 1, 0, 1); -} - -/** \brief Get CPACR - \return Coprocessor Access Control register value - */ -__STATIC_FORCEINLINE uint32_t __get_CPACR(void) -{ - uint32_t result; - __get_CP(15, 0, result, 1, 0, 2); - return result; -} - -/** \brief Set CPACR - \param [in] cpacr Coprocessor Access Control value to set - */ -__STATIC_FORCEINLINE void __set_CPACR(uint32_t cpacr) -{ - __set_CP(15, 0, cpacr, 1, 0, 2); -} - -/** \brief Get DFSR - \return Data Fault Status Register value - */ -__STATIC_FORCEINLINE uint32_t __get_DFSR(void) -{ - uint32_t result; - __get_CP(15, 0, result, 5, 0, 0); - return result; -} - -/** \brief Set DFSR - \param [in] dfsr Data Fault Status value to set - */ -__STATIC_FORCEINLINE void __set_DFSR(uint32_t dfsr) -{ - __set_CP(15, 0, dfsr, 5, 0, 0); -} - -/** \brief Get IFSR - \return Instruction Fault Status Register value - */ -__STATIC_FORCEINLINE uint32_t __get_IFSR(void) -{ - uint32_t result; - __get_CP(15, 0, result, 5, 0, 1); - return result; -} - -/** \brief Set IFSR - \param [in] ifsr Instruction Fault Status value to set - */ -__STATIC_FORCEINLINE void __set_IFSR(uint32_t ifsr) -{ - __set_CP(15, 0, ifsr, 5, 0, 1); -} - -/** \brief Get ISR - \return Interrupt Status Register value - */ -__STATIC_FORCEINLINE uint32_t __get_ISR(void) -{ - uint32_t result; - __get_CP(15, 0, result, 12, 1, 0); - return result; -} - -/** \brief Get CBAR - \return Configuration Base Address register value - */ -__STATIC_FORCEINLINE uint32_t __get_CBAR(void) -{ - uint32_t result; - __get_CP(15, 4, result, 15, 0, 0); - return result; -} - -/** \brief Get TTBR0 - - This function returns the value of the Translation Table Base Register 0. - - \return Translation Table Base Register 0 value - */ -__STATIC_FORCEINLINE uint32_t __get_TTBR0(void) -{ - uint32_t result; - __get_CP(15, 0, result, 2, 0, 0); - return result; -} - -/** \brief Set TTBR0 - - This function assigns the given value to the Translation Table Base Register 0. - - \param [in] ttbr0 Translation Table Base Register 0 value to set - */ -__STATIC_FORCEINLINE void __set_TTBR0(uint32_t ttbr0) -{ - __set_CP(15, 0, ttbr0, 2, 0, 0); -} - -/** \brief Get DACR - - This function returns the value of the Domain Access Control Register. - - \return Domain Access Control Register value - */ -__STATIC_FORCEINLINE uint32_t __get_DACR(void) -{ - uint32_t result; - __get_CP(15, 0, result, 3, 0, 0); - return result; -} - -/** \brief Set DACR - - This function assigns the given value to the Domain Access Control Register. - - \param [in] dacr Domain Access Control Register value to set - */ -__STATIC_FORCEINLINE void __set_DACR(uint32_t dacr) -{ - __set_CP(15, 0, dacr, 3, 0, 0); -} - -/** \brief Set SCTLR - - This function assigns the given value to the System Control Register. - - \param [in] sctlr System Control Register value to set - */ -__STATIC_FORCEINLINE void __set_SCTLR(uint32_t sctlr) -{ - __set_CP(15, 0, sctlr, 1, 0, 0); -} - -/** \brief Get SCTLR - \return System Control Register value - */ -__STATIC_FORCEINLINE uint32_t __get_SCTLR(void) -{ - uint32_t result; - __get_CP(15, 0, result, 1, 0, 0); - return result; -} - -/** \brief Set ACTRL - \param [in] actrl Auxiliary Control Register value to set - */ -__STATIC_FORCEINLINE void __set_ACTRL(uint32_t actrl) -{ - __set_CP(15, 0, actrl, 1, 0, 1); -} - -/** \brief Get ACTRL - \return Auxiliary Control Register value - */ -__STATIC_FORCEINLINE uint32_t __get_ACTRL(void) -{ - uint32_t result; - __get_CP(15, 0, result, 1, 0, 1); - return result; -} - -/** \brief Get MPIDR - - This function returns the value of the Multiprocessor Affinity Register. - - \return Multiprocessor Affinity Register value - */ -__STATIC_FORCEINLINE uint32_t __get_MPIDR(void) -{ - uint32_t result; - __get_CP(15, 0, result, 0, 0, 5); - return result; -} - -/** \brief Get VBAR - - This function returns the value of the Vector Base Address Register. - - \return Vector Base Address Register - */ -__STATIC_FORCEINLINE uint32_t __get_VBAR(void) -{ - uint32_t result; - __get_CP(15, 0, result, 12, 0, 0); - return result; -} - -/** \brief Set VBAR - - This function assigns the given value to the Vector Base Address Register. - - \param [in] vbar Vector Base Address Register value to set - */ -__STATIC_FORCEINLINE void __set_VBAR(uint32_t vbar) -{ - __set_CP(15, 0, vbar, 12, 0, 0); -} - -/** \brief Get MVBAR - - This function returns the value of the Monitor Vector Base Address Register. - - \return Monitor Vector Base Address Register - */ -__STATIC_FORCEINLINE uint32_t __get_MVBAR(void) -{ - uint32_t result; - __get_CP(15, 0, result, 12, 0, 1); - return result; -} - -/** \brief Set MVBAR - - This function assigns the given value to the Monitor Vector Base Address Register. - - \param [in] mvbar Monitor Vector Base Address Register value to set - */ -__STATIC_FORCEINLINE void __set_MVBAR(uint32_t mvbar) -{ - __set_CP(15, 0, mvbar, 12, 0, 1); -} - -#if (defined(__CORTEX_A) && (__CORTEX_A == 7U) && \ - defined(__TIM_PRESENT) && (__TIM_PRESENT == 1U)) || \ - defined(DOXYGEN) - -/** \brief Set CNTFRQ - - This function assigns the given value to PL1 Physical Timer Counter Frequency Register (CNTFRQ). - - \param [in] value CNTFRQ Register value to set -*/ -__STATIC_FORCEINLINE void __set_CNTFRQ(uint32_t value) -{ - __set_CP(15, 0, value, 14, 0, 0); -} - -/** \brief Get CNTFRQ - - This function returns the value of the PL1 Physical Timer Counter Frequency Register (CNTFRQ). - - \return CNTFRQ Register value - */ -__STATIC_FORCEINLINE uint32_t __get_CNTFRQ(void) -{ - uint32_t result; - __get_CP(15, 0, result, 14, 0 , 0); - return result; -} - -/** \brief Set CNTP_TVAL - - This function assigns the given value to PL1 Physical Timer Value Register (CNTP_TVAL). - - \param [in] value CNTP_TVAL Register value to set -*/ -__STATIC_FORCEINLINE void __set_CNTP_TVAL(uint32_t value) -{ - __set_CP(15, 0, value, 14, 2, 0); -} - -/** \brief Get CNTP_TVAL - - This function returns the value of the PL1 Physical Timer Value Register (CNTP_TVAL). - - \return CNTP_TVAL Register value - */ -__STATIC_FORCEINLINE uint32_t __get_CNTP_TVAL(void) -{ - uint32_t result; - __get_CP(15, 0, result, 14, 2, 0); - return result; -} - -/** \brief Get CNTPCT - - This function returns the value of the 64 bits PL1 Physical Count Register (CNTPCT). - - \return CNTPCT Register value - */ -__STATIC_FORCEINLINE uint64_t __get_CNTPCT(void) -{ - uint64_t result; - __get_CP64(15, 0, result, 14); - return result; -} - -/** \brief Set CNTP_CVAL - - This function assigns the given value to 64bits PL1 Physical Timer CompareValue Register (CNTP_CVAL). - - \param [in] value CNTP_CVAL Register value to set -*/ -__STATIC_FORCEINLINE void __set_CNTP_CVAL(uint64_t value) -{ - __set_CP64(15, 2, value, 14); -} - -/** \brief Get CNTP_CVAL - - This function returns the value of the 64 bits PL1 Physical Timer CompareValue Register (CNTP_CVAL). - - \return CNTP_CVAL Register value - */ -__STATIC_FORCEINLINE uint64_t __get_CNTP_CVAL(void) -{ - uint64_t result; - __get_CP64(15, 2, result, 14); - return result; -} - -/** \brief Set CNTP_CTL - - This function assigns the given value to PL1 Physical Timer Control Register (CNTP_CTL). - - \param [in] value CNTP_CTL Register value to set -*/ -__STATIC_FORCEINLINE void __set_CNTP_CTL(uint32_t value) -{ - __set_CP(15, 0, value, 14, 2, 1); -} - -/** \brief Get CNTP_CTL register - \return CNTP_CTL Register value - */ -__STATIC_FORCEINLINE uint32_t __get_CNTP_CTL(void) -{ - uint32_t result; - __get_CP(15, 0, result, 14, 2, 1); - return result; -} - -#endif - -/** \brief Set TLBIALL - - TLB Invalidate All - */ -__STATIC_FORCEINLINE void __set_TLBIALL(uint32_t value) -{ - __set_CP(15, 0, value, 8, 7, 0); -} - -/** \brief Set BPIALL. - - Branch Predictor Invalidate All - */ -__STATIC_FORCEINLINE void __set_BPIALL(uint32_t value) -{ - __set_CP(15, 0, value, 7, 5, 6); -} - -/** \brief Set ICIALLU - - Instruction Cache Invalidate All - */ -__STATIC_FORCEINLINE void __set_ICIALLU(uint32_t value) -{ - __set_CP(15, 0, value, 7, 5, 0); -} - -/** \brief Set DCCMVAC - - Data cache clean - */ -__STATIC_FORCEINLINE void __set_DCCMVAC(uint32_t value) -{ - __set_CP(15, 0, value, 7, 10, 1); -} - -/** \brief Set DCIMVAC - - Data cache invalidate - */ -__STATIC_FORCEINLINE void __set_DCIMVAC(uint32_t value) -{ - __set_CP(15, 0, value, 7, 6, 1); -} - -/** \brief Set DCCIMVAC - - Data cache clean and invalidate - */ -__STATIC_FORCEINLINE void __set_DCCIMVAC(uint32_t value) -{ - __set_CP(15, 0, value, 7, 14, 1); -} - -/** \brief Set CSSELR - */ -__STATIC_FORCEINLINE void __set_CSSELR(uint32_t value) -{ -// __ASM volatile("MCR p15, 2, %0, c0, c0, 0" : : "r"(value) : "memory"); - __set_CP(15, 2, value, 0, 0, 0); -} - -/** \brief Get CSSELR - \return CSSELR Register value - */ -__STATIC_FORCEINLINE uint32_t __get_CSSELR(void) -{ - uint32_t result; -// __ASM volatile("MRC p15, 2, %0, c0, c0, 0" : "=r"(result) : : "memory"); - __get_CP(15, 2, result, 0, 0, 0); - return result; -} - -/** \brief Set CCSIDR - \deprecated CCSIDR itself is read-only. Use __set_CSSELR to select cache level instead. - */ -CMSIS_DEPRECATED -__STATIC_FORCEINLINE void __set_CCSIDR(uint32_t value) -{ - __set_CSSELR(value); -} - -/** \brief Get CCSIDR - \return CCSIDR Register value - */ -__STATIC_FORCEINLINE uint32_t __get_CCSIDR(void) -{ - uint32_t result; -// __ASM volatile("MRC p15, 1, %0, c0, c0, 0" : "=r"(result) : : "memory"); - __get_CP(15, 1, result, 0, 0, 0); - return result; -} - -/** \brief Get CLIDR - \return CLIDR Register value - */ -__STATIC_FORCEINLINE uint32_t __get_CLIDR(void) -{ - uint32_t result; -// __ASM volatile("MRC p15, 1, %0, c0, c0, 1" : "=r"(result) : : "memory"); - __get_CP(15, 1, result, 0, 0, 1); - return result; -} - -/** \brief Set DCISW - */ -__STATIC_FORCEINLINE void __set_DCISW(uint32_t value) -{ -// __ASM volatile("MCR p15, 0, %0, c7, c6, 2" : : "r"(value) : "memory") - __set_CP(15, 0, value, 7, 6, 2); -} - -/** \brief Set DCCSW - */ -__STATIC_FORCEINLINE void __set_DCCSW(uint32_t value) -{ -// __ASM volatile("MCR p15, 0, %0, c7, c10, 2" : : "r"(value) : "memory") - __set_CP(15, 0, value, 7, 10, 2); -} - -/** \brief Set DCCISW - */ -__STATIC_FORCEINLINE void __set_DCCISW(uint32_t value) -{ -// __ASM volatile("MCR p15, 0, %0, c7, c14, 2" : : "r"(value) : "memory") - __set_CP(15, 0, value, 7, 14, 2); -} - -#endif diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_gcc.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_gcc.h deleted file mode 100644 index 920d612..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_gcc.h +++ /dev/null @@ -1,917 +0,0 @@ -/**************************************************************************//** - * @file cmsis_gcc.h - * @brief CMSIS compiler specific macros, functions, instructions - * @version V1.3.2 - * @date 24. March 2022 - ******************************************************************************/ -/* - * Copyright (c) 2009-2022 Arm Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef __CMSIS_GCC_H -#define __CMSIS_GCC_H - -/* ignore some GCC warnings */ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wsign-conversion" -#pragma GCC diagnostic ignored "-Wconversion" -#pragma GCC diagnostic ignored "-Wunused-parameter" - -/* Fallback for __has_builtin */ -#ifndef __has_builtin - #define __has_builtin(x) (0) -#endif - -/* CMSIS compiler specific defines */ -#ifndef __ASM - #define __ASM __asm -#endif -#ifndef __INLINE - #define __INLINE inline -#endif -#ifndef __FORCEINLINE - #define __FORCEINLINE __attribute__((always_inline)) -#endif -#ifndef __STATIC_INLINE - #define __STATIC_INLINE static inline -#endif -#ifndef __STATIC_FORCEINLINE - #define __STATIC_FORCEINLINE __attribute__((always_inline)) static inline -#endif -#ifndef __NO_RETURN - #define __NO_RETURN __attribute__((__noreturn__)) -#endif -#ifndef CMSIS_DEPRECATED - #define CMSIS_DEPRECATED __attribute__((deprecated)) -#endif -#ifndef __USED - #define __USED __attribute__((used)) -#endif -#ifndef __WEAK - #define __WEAK __attribute__((weak)) -#endif -#ifndef __PACKED - #define __PACKED __attribute__((packed, aligned(1))) -#endif -#ifndef __PACKED_STRUCT - #define __PACKED_STRUCT struct __attribute__((packed, aligned(1))) -#endif -#ifndef __UNALIGNED_UINT16_WRITE - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wpacked" -/*lint -esym(9058, T_UINT16_WRITE)*/ /* disable MISRA 2012 Rule 2.4 for T_UINT16_WRITE */ - __PACKED_STRUCT T_UINT16_WRITE { uint16_t v; }; - #pragma GCC diagnostic pop - #define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void *)(addr))->v) = (val)) -#endif -#ifndef __UNALIGNED_UINT16_READ - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wpacked" -/*lint -esym(9058, T_UINT16_READ)*/ /* disable MISRA 2012 Rule 2.4 for T_UINT16_READ */ - __PACKED_STRUCT T_UINT16_READ { uint16_t v; }; - #pragma GCC diagnostic pop - #define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v) -#endif -#ifndef __UNALIGNED_UINT32_WRITE - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wpacked" -/*lint -esym(9058, T_UINT32_WRITE)*/ /* disable MISRA 2012 Rule 2.4 for T_UINT32_WRITE */ - __PACKED_STRUCT T_UINT32_WRITE { uint32_t v; }; - #pragma GCC diagnostic pop - #define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val)) -#endif -#ifndef __UNALIGNED_UINT32_READ - #pragma GCC diagnostic push - #pragma GCC diagnostic ignored "-Wpacked" - __PACKED_STRUCT T_UINT32_READ { uint32_t v; }; - #pragma GCC diagnostic pop - #define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v) -#endif -#ifndef __ALIGNED - #define __ALIGNED(x) __attribute__((aligned(x))) -#endif -#ifndef __COMPILER_BARRIER - #define __COMPILER_BARRIER() __ASM volatile("":::"memory") -#endif - - -__STATIC_FORCEINLINE uint32_t __QSUB16(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM volatile ("qsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - - -__STATIC_FORCEINLINE uint32_t __QSUB8(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM volatile ("qsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - - -__STATIC_FORCEINLINE uint32_t __QADD16(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM volatile ("qadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __QADD8(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM volatile ("qadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE int32_t __QADD( int32_t op1, int32_t op2) -{ - int32_t result; - - __ASM volatile ("qadd %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __QSAX(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM ("qsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SHSAX(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM ("shsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint64_t __SMLALD (uint32_t op1, uint32_t op2, uint64_t acc) -{ - union llreg_u{ - uint32_t w32[2]; - uint64_t w64; - } llr; - llr.w64 = acc; - -#ifndef __ARMEB__ /* Little endian */ - __ASM volatile ("smlald %0, %1, %2, %3" : "=r" (llr.w32[0]), "=r" (llr.w32[1]): "r" (op1), "r" (op2) , "0" (llr.w32[0]), "1" (llr.w32[1]) ); -#else /* Big endian */ - __ASM volatile ("smlald %0, %1, %2, %3" : "=r" (llr.w32[1]), "=r" (llr.w32[0]): "r" (op1), "r" (op2) , "0" (llr.w32[1]), "1" (llr.w32[0]) ); -#endif - - return(llr.w64); -} - -__STATIC_FORCEINLINE int32_t __QSUB( int32_t op1, int32_t op2) -{ - int32_t result; - - __ASM volatile ("qsub %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SXTB16(uint32_t op1) -{ - uint32_t result; - - __ASM ("sxtb16 %0, %1" : "=r" (result) : "r" (op1)); - return(result); -} - - -__STATIC_FORCEINLINE uint32_t __SMUAD (uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM volatile ("smuad %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - - - -#define __PKHBT(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0x0000FFFFUL) | \ - ((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL) ) - -#define __PKHTB(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0xFFFF0000UL) | \ - ((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL) ) - -__STATIC_FORCEINLINE uint32_t __SMLAD (uint32_t op1, uint32_t op2, uint32_t op3) -{ - uint32_t result; - - __ASM volatile ("smlad %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SMUADX (uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM volatile ("smuadx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SMLADX (uint32_t op1, uint32_t op2, uint32_t op3) -{ - uint32_t result; - - __ASM volatile ("smladx %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); - return(result); -} - -__STATIC_FORCEINLINE uint64_t __SMLALDX (uint32_t op1, uint32_t op2, uint64_t acc) -{ - union llreg_u{ - uint32_t w32[2]; - uint64_t w64; - } llr; - llr.w64 = acc; - -#ifndef __ARMEB__ /* Little endian */ - __ASM volatile ("smlaldx %0, %1, %2, %3" : "=r" (llr.w32[0]), "=r" (llr.w32[1]): "r" (op1), "r" (op2) , "0" (llr.w32[0]), "1" (llr.w32[1]) ); -#else /* Big endian */ - __ASM volatile ("smlaldx %0, %1, %2, %3" : "=r" (llr.w32[1]), "=r" (llr.w32[0]): "r" (op1), "r" (op2) , "0" (llr.w32[1]), "1" (llr.w32[0]) ); -#endif - - return(llr.w64); -} - -__STATIC_FORCEINLINE int32_t __SMMLA (int32_t op1, int32_t op2, int32_t op3) -{ - int32_t result; - - __ASM volatile ("smmla %0, %1, %2, %3" : "=r" (result): "r" (op1), "r" (op2), "r" (op3) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SMUSD (uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM volatile ("smusd %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SMUSDX (uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM volatile ("smusdx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __QASX(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM ("qasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SHADD16(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM ("shadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SHSUB16(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM ("shsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SHASX(uint32_t op1, uint32_t op2) -{ - uint32_t result; - - __ASM ("shasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) ); - return(result); -} - -__STATIC_FORCEINLINE uint32_t __SMLSDX (uint32_t op1, uint32_t op2, uint32_t op3) -{ - uint32_t result; - - __ASM volatile ("smlsdx %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) ); - return(result); -} - - -/* ########################## Core Instruction Access ######################### */ -/** - \brief No Operation - */ -#define __NOP() __ASM volatile ("nop") - -/** - \brief Wait For Interrupt - */ -#define __WFI() __ASM volatile ("wfi":::"memory") - -/** - \brief Wait For Event - */ -#define __WFE() __ASM volatile ("wfe":::"memory") - -/** - \brief Send Event - */ -#define __SEV() __ASM volatile ("sev") - -/** - \brief Instruction Synchronization Barrier - \details Instruction Synchronization Barrier flushes the pipeline in the processor, - so that all instructions following the ISB are fetched from cache or memory, - after the instruction has been completed. - */ -__STATIC_FORCEINLINE void __ISB(void) -{ - __ASM volatile ("isb 0xF":::"memory"); -} - - -/** - \brief Data Synchronization Barrier - \details Acts as a special kind of Data Memory Barrier. - It completes when all explicit memory accesses before this instruction complete. - */ -__STATIC_FORCEINLINE void __DSB(void) -{ - __ASM volatile ("dsb 0xF":::"memory"); -} - -/** - \brief Data Memory Barrier - \details Ensures the apparent order of the explicit memory operations before - and after the instruction, without ensuring their completion. - */ -__STATIC_FORCEINLINE void __DMB(void) -{ - __ASM volatile ("dmb 0xF":::"memory"); -} - -/** - \brief Reverse byte order (32 bit) - \details Reverses the byte order in unsigned integer value. For example, 0x12345678 becomes 0x78563412. - \param [in] value Value to reverse - \return Reversed value - */ -__STATIC_FORCEINLINE uint32_t __REV(uint32_t value) -{ -#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5) - return __builtin_bswap32(value); -#else - uint32_t result; - - __ASM ("rev %0, %1" : "=r" (result) : "r" (value) ); - return result; -#endif -} - -/** - \brief Reverse byte order (16 bit) - \details Reverses the byte order within each halfword of a word. For example, 0x12345678 becomes 0x34127856. - \param [in] value Value to reverse - \return Reversed value - */ -__STATIC_FORCEINLINE uint32_t __REV16(uint32_t value) -{ - uint32_t result; - __ASM ("rev16 %0, %1" : "=r" (result) : "r" (value)); - return result; -} - -/** - \brief Reverse byte order (16 bit) - \details Reverses the byte order in a 16-bit value and returns the signed 16-bit result. For example, 0x0080 becomes 0x8000. - \param [in] value Value to reverse - \return Reversed value - */ -__STATIC_FORCEINLINE int16_t __REVSH(int16_t value) -{ -#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) - return (int16_t)__builtin_bswap16(value); -#else - int16_t result; - - __ASM ("revsh %0, %1" : "=r" (result) : "r" (value) ); - return result; -#endif -} - -/** - \brief Rotate Right in unsigned value (32 bit) - \details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits. - \param [in] op1 Value to rotate - \param [in] op2 Number of Bits to rotate - \return Rotated value - */ -__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2) -{ - op2 %= 32U; - if (op2 == 0U) - { - return op1; - } - return (op1 >> op2) | (op1 << (32U - op2)); -} - - -/** - \brief Breakpoint - \param [in] value is ignored by the processor. - If required, a debugger can use it to store additional information about the breakpoint. - */ -#define __BKPT(value) __ASM volatile ("bkpt "#value) - -/** - \brief Reverse bit order of value - \details Reverses the bit order of the given value. - \param [in] value Value to reverse - \return Reversed value - */ -__STATIC_FORCEINLINE uint32_t __RBIT(uint32_t value) -{ - uint32_t result; - __ASM ("rbit %0, %1" : "=r" (result) : "r" (value) ); - return result; -} - -/** - \brief Count leading zeros - \param [in] value Value to count the leading zeros - \return number of leading zeros in value - */ -__STATIC_FORCEINLINE uint8_t __CLZ(uint32_t value) -{ - /* Even though __builtin_clz produces a CLZ instruction on ARM, formally - __builtin_clz(0) is undefined behaviour, so handle this case specially. - This guarantees ARM-compatible results if happening to compile on a non-ARM - target, and ensures the compiler doesn't decide to activate any - optimisations using the logic "value was passed to __builtin_clz, so it - is non-zero". - ARM GCC 7.3 and possibly earlier will optimise this test away, leaving a - single CLZ instruction. - */ - if (value == 0U) - { - return 32U; - } - return __builtin_clz(value); -} - -/** - \brief LDR Exclusive (8 bit) - \details Executes a exclusive LDR instruction for 8 bit value. - \param [in] ptr Pointer to data - \return value of type uint8_t at (*ptr) - */ -__STATIC_FORCEINLINE uint8_t __LDREXB(volatile uint8_t *addr) -{ - uint32_t result; - -#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) - __ASM volatile ("ldrexb %0, %1" : "=r" (result) : "Q" (*addr) ); -#else - /* Prior to GCC 4.8, "Q" will be expanded to [rx, #0] which is not - accepted by assembler. So has to use following less efficient pattern. - */ - __ASM volatile ("ldrexb %0, [%1]" : "=r" (result) : "r" (addr) : "memory" ); -#endif - return ((uint8_t) result); /* Add explicit type cast here */ -} - - -/** - \brief LDR Exclusive (16 bit) - \details Executes a exclusive LDR instruction for 16 bit values. - \param [in] ptr Pointer to data - \return value of type uint16_t at (*ptr) - */ -__STATIC_FORCEINLINE uint16_t __LDREXH(volatile uint16_t *addr) -{ - uint32_t result; - -#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) - __ASM volatile ("ldrexh %0, %1" : "=r" (result) : "Q" (*addr) ); -#else - /* Prior to GCC 4.8, "Q" will be expanded to [rx, #0] which is not - accepted by assembler. So has to use following less efficient pattern. - */ - __ASM volatile ("ldrexh %0, [%1]" : "=r" (result) : "r" (addr) : "memory" ); -#endif - return ((uint16_t) result); /* Add explicit type cast here */ -} - - -/** - \brief LDR Exclusive (32 bit) - \details Executes a exclusive LDR instruction for 32 bit values. - \param [in] ptr Pointer to data - \return value of type uint32_t at (*ptr) - */ -__STATIC_FORCEINLINE uint32_t __LDREXW(volatile uint32_t *addr) -{ - uint32_t result; - - __ASM volatile ("ldrex %0, %1" : "=r" (result) : "Q" (*addr) ); - return(result); -} - - -/** - \brief STR Exclusive (8 bit) - \details Executes a exclusive STR instruction for 8 bit values. - \param [in] value Value to store - \param [in] ptr Pointer to location - \return 0 Function succeeded - \return 1 Function failed - */ -__STATIC_FORCEINLINE uint32_t __STREXB(uint8_t value, volatile uint8_t *addr) -{ - uint32_t result; - - __ASM volatile ("strexb %0, %2, %1" : "=&r" (result), "=Q" (*addr) : "r" ((uint32_t)value) ); - return(result); -} - - -/** - \brief STR Exclusive (16 bit) - \details Executes a exclusive STR instruction for 16 bit values. - \param [in] value Value to store - \param [in] ptr Pointer to location - \return 0 Function succeeded - \return 1 Function failed - */ -__STATIC_FORCEINLINE uint32_t __STREXH(uint16_t value, volatile uint16_t *addr) -{ - uint32_t result; - - __ASM volatile ("strexh %0, %2, %1" : "=&r" (result), "=Q" (*addr) : "r" ((uint32_t)value) ); - return(result); -} - - -/** - \brief STR Exclusive (32 bit) - \details Executes a exclusive STR instruction for 32 bit values. - \param [in] value Value to store - \param [in] ptr Pointer to location - \return 0 Function succeeded - \return 1 Function failed - */ -__STATIC_FORCEINLINE uint32_t __STREXW(uint32_t value, volatile uint32_t *addr) -{ - uint32_t result; - - __ASM volatile ("strex %0, %2, %1" : "=&r" (result), "=Q" (*addr) : "r" (value) ); - return(result); -} - - -/** - \brief Remove the exclusive lock - \details Removes the exclusive lock which is created by LDREX. - */ -__STATIC_FORCEINLINE void __CLREX(void) -{ - __ASM volatile ("clrex" ::: "memory"); -} - -/** - \brief Signed Saturate - \details Saturates a signed value. - \param [in] value Value to be saturated - \param [in] sat Bit position to saturate to (1..32) - \return Saturated value - */ -#define __SSAT(ARG1, ARG2) \ -__extension__ \ -({ \ - int32_t __RES, __ARG1 = (ARG1); \ - __ASM volatile ("ssat %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) : "cc" ); \ - __RES; \ - }) - - -/** - \brief Unsigned Saturate - \details Saturates an unsigned value. - \param [in] value Value to be saturated - \param [in] sat Bit position to saturate to (0..31) - \return Saturated value - */ -#define __USAT(ARG1, ARG2) \ -__extension__ \ -({ \ - uint32_t __RES, __ARG1 = (ARG1); \ - __ASM volatile ("usat %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) : "cc" ); \ - __RES; \ - }) - -/* ########################### Core Function Access ########################### */ - -/** - \brief Enable IRQ Interrupts - \details Enables IRQ interrupts by clearing the I-bit in the CPSR. - Can only be executed in Privileged modes. - */ -__STATIC_FORCEINLINE void __enable_irq(void) -{ - __ASM volatile ("cpsie i" : : : "memory"); -} - -/** - \brief Disable IRQ Interrupts - \details Disables IRQ interrupts by setting the I-bit in the CPSR. - Can only be executed in Privileged modes. - */ -__STATIC_FORCEINLINE void __disable_irq(void) -{ - __ASM volatile ("cpsid i" : : : "memory"); -} - -/** - \brief Enable FIQ - \details Enables FIQ interrupts by clearing the F-bit in the CPSR. - Can only be executed in Privileged modes. - */ -__STATIC_FORCEINLINE void __enable_fault_irq(void) -{ - __ASM volatile ("cpsie f" : : : "memory"); -} - -/** - \brief Disable FIQ - \details Disables FIQ interrupts by setting the F-bit in the CPSR. - Can only be executed in Privileged modes. - */ -__STATIC_FORCEINLINE void __disable_fault_irq(void) -{ - __ASM volatile ("cpsid f" : : : "memory"); -} - -/** - \brief Get FPSCR - \details Returns the current value of the Floating Point Status/Control register. - \return Floating Point Status/Control register value - */ -__STATIC_FORCEINLINE uint32_t __get_FPSCR(void) -{ - #if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \ - (defined (__FPU_USED ) && (__FPU_USED == 1U)) ) - #if __has_builtin(__builtin_arm_get_fpscr) - // Re-enable using built-in when GCC has been fixed - // || (__GNUC__ > 7) || (__GNUC__ == 7 && __GNUC_MINOR__ >= 2) - /* see https://gcc.gnu.org/ml/gcc-patches/2017-04/msg00443.html */ - return __builtin_arm_get_fpscr(); - #else - uint32_t result; - - __ASM volatile ("VMRS %0, fpscr" : "=r" (result) ); - return(result); - #endif - #else - return(0U); - #endif -} - -/** - \brief Set FPSCR - \details Assigns the given value to the Floating Point Status/Control register. - \param [in] fpscr Floating Point Status/Control value to set - */ -__STATIC_FORCEINLINE void __set_FPSCR(uint32_t fpscr) -{ - #if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \ - (defined (__FPU_USED ) && (__FPU_USED == 1U)) ) - #if __has_builtin(__builtin_arm_set_fpscr) - // Re-enable using built-in when GCC has been fixed - // || (__GNUC__ > 7) || (__GNUC__ == 7 && __GNUC_MINOR__ >= 2) - /* see https://gcc.gnu.org/ml/gcc-patches/2017-04/msg00443.html */ - __builtin_arm_set_fpscr(fpscr); - #else - __ASM volatile ("VMSR fpscr, %0" : : "r" (fpscr) : "vfpcc", "memory"); - #endif - #else - (void)fpscr; - #endif -} - -/** \brief Get CPSR Register - \return CPSR Register value - */ -__STATIC_FORCEINLINE uint32_t __get_CPSR(void) -{ - uint32_t result; - __ASM volatile("MRS %0, cpsr" : "=r" (result) ); - return(result); -} - -/** \brief Set CPSR Register - \param [in] cpsr CPSR value to set - */ -__STATIC_FORCEINLINE void __set_CPSR(uint32_t cpsr) -{ - __ASM volatile ("MSR cpsr, %0" : : "r" (cpsr) : "cc", "memory"); -} - -/** \brief Get Mode - \return Processor Mode - */ -__STATIC_FORCEINLINE uint32_t __get_mode(void) -{ - return (__get_CPSR() & 0x1FU); -} - -/** \brief Set Mode - \param [in] mode Mode value to set - */ -__STATIC_FORCEINLINE void __set_mode(uint32_t mode) -{ - __ASM volatile("MSR cpsr_c, %0" : : "r" (mode) : "memory"); -} - -/** \brief Get Stack Pointer - \return Stack Pointer value - */ -__STATIC_FORCEINLINE uint32_t __get_SP(void) -{ - uint32_t result; - __ASM volatile("MOV %0, sp" : "=r" (result) : : "memory"); - return result; -} - -/** \brief Set Stack Pointer - \param [in] stack Stack Pointer value to set - */ -__STATIC_FORCEINLINE void __set_SP(uint32_t stack) -{ - __ASM volatile("MOV sp, %0" : : "r" (stack) : "memory"); -} - -/** \brief Get USR/SYS Stack Pointer - \return USR/SYS Stack Pointer value - */ -__STATIC_FORCEINLINE uint32_t __get_SP_usr(void) -{ - uint32_t cpsr = __get_CPSR(); - uint32_t result; - __ASM volatile( - "CPS #0x1F \n" - "MOV %0, sp " : "=r"(result) : : "memory" - ); - __set_CPSR(cpsr); - __ISB(); - return result; -} - -/** \brief Set USR/SYS Stack Pointer - \param [in] topOfProcStack USR/SYS Stack Pointer value to set - */ -__STATIC_FORCEINLINE void __set_SP_usr(uint32_t topOfProcStack) -{ - uint32_t cpsr = __get_CPSR(); - __ASM volatile( - "CPS #0x1F \n" - "MOV sp, %0 " : : "r" (topOfProcStack) : "memory" - ); - __set_CPSR(cpsr); - __ISB(); -} - -/** \brief Get FPEXC - \return Floating Point Exception Control register value - */ -__STATIC_FORCEINLINE uint32_t __get_FPEXC(void) -{ -#if (__FPU_PRESENT == 1) - uint32_t result; - __ASM volatile("VMRS %0, fpexc" : "=r" (result) : : "memory"); - return(result); -#else - return(0); -#endif -} - -/** \brief Set FPEXC - \param [in] fpexc Floating Point Exception Control value to set - */ -__STATIC_FORCEINLINE void __set_FPEXC(uint32_t fpexc) -{ -#if (__FPU_PRESENT == 1) - __ASM volatile ("VMSR fpexc, %0" : : "r" (fpexc) : "memory"); -#endif -} - -/* - * Include common core functions to access Coprocessor 15 registers - */ - -#define __get_CP(cp, op1, Rt, CRn, CRm, op2) __ASM volatile("MRC p" # cp ", " # op1 ", %0, c" # CRn ", c" # CRm ", " # op2 : "=r" (Rt) : : "memory" ) -#define __set_CP(cp, op1, Rt, CRn, CRm, op2) __ASM volatile("MCR p" # cp ", " # op1 ", %0, c" # CRn ", c" # CRm ", " # op2 : : "r" (Rt) : "memory" ) -#define __get_CP64(cp, op1, Rt, CRm) __ASM volatile("MRRC p" # cp ", " # op1 ", %Q0, %R0, c" # CRm : "=r" (Rt) : : "memory" ) -#define __set_CP64(cp, op1, Rt, CRm) __ASM volatile("MCRR p" # cp ", " # op1 ", %Q0, %R0, c" # CRm : : "r" (Rt) : "memory" ) - -#include "cmsis_cp15.h" - -/** \brief Enable Floating Point Unit - - Critical section, called from undef handler, so systick is disabled - */ -__STATIC_INLINE void __FPU_Enable(void) -{ - __ASM volatile( - //Permit access to VFP/NEON, registers by modifying CPACR - " MRC p15,0,R1,c1,c0,2 \n" - " ORR R1,R1,#0x00F00000 \n" - " MCR p15,0,R1,c1,c0,2 \n" - - //Ensure that subsequent instructions occur in the context of VFP/NEON access permitted - " ISB \n" - - //Enable VFP/NEON - " VMRS R1,FPEXC \n" - " ORR R1,R1,#0x40000000 \n" - " VMSR FPEXC,R1 \n" - - //Initialise VFP/NEON registers to 0 - " MOV R2,#0 \n" - - //Initialise D16 registers to 0 - " VMOV D0, R2,R2 \n" - " VMOV D1, R2,R2 \n" - " VMOV D2, R2,R2 \n" - " VMOV D3, R2,R2 \n" - " VMOV D4, R2,R2 \n" - " VMOV D5, R2,R2 \n" - " VMOV D6, R2,R2 \n" - " VMOV D7, R2,R2 \n" - " VMOV D8, R2,R2 \n" - " VMOV D9, R2,R2 \n" - " VMOV D10,R2,R2 \n" - " VMOV D11,R2,R2 \n" - " VMOV D12,R2,R2 \n" - " VMOV D13,R2,R2 \n" - " VMOV D14,R2,R2 \n" - " VMOV D15,R2,R2 \n" - -#if (defined(__ARM_NEON) && (__ARM_NEON == 1)) - //Initialise D32 registers to 0 - " VMOV D16,R2,R2 \n" - " VMOV D17,R2,R2 \n" - " VMOV D18,R2,R2 \n" - " VMOV D19,R2,R2 \n" - " VMOV D20,R2,R2 \n" - " VMOV D21,R2,R2 \n" - " VMOV D22,R2,R2 \n" - " VMOV D23,R2,R2 \n" - " VMOV D24,R2,R2 \n" - " VMOV D25,R2,R2 \n" - " VMOV D26,R2,R2 \n" - " VMOV D27,R2,R2 \n" - " VMOV D28,R2,R2 \n" - " VMOV D29,R2,R2 \n" - " VMOV D30,R2,R2 \n" - " VMOV D31,R2,R2 \n" -#endif - - //Initialise FPSCR to a known state - " VMRS R1,FPSCR \n" - " LDR R2,=0x00086060 \n" //Mask off all bits that do not have to be preserved. Non-preserved bits can/should be zero. - " AND R1,R1,R2 \n" - " VMSR FPSCR,R1 " - : : : "cc", "r1", "r2" - ); -} - -#pragma GCC diagnostic pop - -#endif /* __CMSIS_GCC_H */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_iccarm.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_iccarm.h deleted file mode 100644 index 08aa224..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/cmsis_iccarm.h +++ /dev/null @@ -1,573 +0,0 @@ -/**************************************************************************//** - * @file cmsis_iccarm.h - * @brief CMSIS compiler ICCARM (IAR Compiler for Arm) header file - * @version V5.0.7 - * @date 15. May 2019 - ******************************************************************************/ - -//------------------------------------------------------------------------------ -// -// Copyright (c) 2017-2018 IAR Systems -// Copyright (c) 2018-2019 Arm Limited -// -// SPDX-License-Identifier: Apache-2.0 -// -// Licensed under the Apache License, Version 2.0 (the "License") -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -//------------------------------------------------------------------------------ - - -#ifndef __CMSIS_ICCARM_H__ -#define __CMSIS_ICCARM_H__ - -#ifndef __ICCARM__ - #error This file should only be compiled by ICCARM -#endif - -#pragma system_include - -#define __IAR_FT _Pragma("inline=forced") __intrinsic - -#if (__VER__ >= 8000000) - #define __ICCARM_V8 1 -#else - #define __ICCARM_V8 0 -#endif - -#pragma language=extended - -#ifndef __ALIGNED - #if __ICCARM_V8 - #define __ALIGNED(x) __attribute__((aligned(x))) - #elif (__VER__ >= 7080000) - /* Needs IAR language extensions */ - #define __ALIGNED(x) __attribute__((aligned(x))) - #else - #warning No compiler specific solution for __ALIGNED.__ALIGNED is ignored. - #define __ALIGNED(x) - #endif -#endif - - -/* Define compiler macros for CPU architecture, used in CMSIS 5. - */ -#if __ARM_ARCH_7A__ -/* Macro already defined */ -#else - #if defined(__ARM7A__) - #define __ARM_ARCH_7A__ 1 - #endif -#endif - -#ifndef __ASM - #define __ASM __asm -#endif - -#ifndef __COMPILER_BARRIER - #define __COMPILER_BARRIER() __ASM volatile("":::"memory") -#endif - -#ifndef __INLINE - #define __INLINE inline -#endif - -#ifndef __NO_RETURN - #if __ICCARM_V8 - #define __NO_RETURN __attribute__((__noreturn__)) - #else - #define __NO_RETURN _Pragma("object_attribute=__noreturn") - #endif -#endif - -#ifndef __PACKED - /* Needs IAR language extensions */ - #if __ICCARM_V8 - #define __PACKED __attribute__((packed, aligned(1))) - #else - #define __PACKED __packed - #endif -#endif - -#ifndef __PACKED_STRUCT - /* Needs IAR language extensions */ - #if __ICCARM_V8 - #define __PACKED_STRUCT struct __attribute__((packed, aligned(1))) - #else - #define __PACKED_STRUCT __packed struct - #endif -#endif - -#ifndef __PACKED_UNION - /* Needs IAR language extensions */ - #if __ICCARM_V8 - #define __PACKED_UNION union __attribute__((packed, aligned(1))) - #else - #define __PACKED_UNION __packed union - #endif -#endif - -#ifndef __RESTRICT - #if __ICCARM_V8 - #define __RESTRICT __restrict - #else - /* Needs IAR language extensions */ - #define __RESTRICT restrict - #endif -#endif - -#ifndef __STATIC_INLINE - #define __STATIC_INLINE static inline -#endif - -#ifndef __FORCEINLINE - #define __FORCEINLINE _Pragma("inline=forced") -#endif - -#ifndef __STATIC_FORCEINLINE - #define __STATIC_FORCEINLINE __FORCEINLINE __STATIC_INLINE -#endif - -#ifndef CMSIS_DEPRECATED - #define CMSIS_DEPRECATED __attribute__((deprecated)) -#endif - -#ifndef __UNALIGNED_UINT16_READ - #pragma language=save - #pragma language=extended - __IAR_FT uint16_t __iar_uint16_read(void const *ptr) - { - return *(__packed uint16_t*)(ptr); - } - #pragma language=restore - #define __UNALIGNED_UINT16_READ(PTR) __iar_uint16_read(PTR) -#endif - - -#ifndef __UNALIGNED_UINT16_WRITE - #pragma language=save - #pragma language=extended - __IAR_FT void __iar_uint16_write(void const *ptr, uint16_t val) - { - *(__packed uint16_t*)(ptr) = val;; - } - #pragma language=restore - #define __UNALIGNED_UINT16_WRITE(PTR,VAL) __iar_uint16_write(PTR,VAL) -#endif - -#ifndef __UNALIGNED_UINT32_READ - #pragma language=save - #pragma language=extended - __IAR_FT uint32_t __iar_uint32_read(void const *ptr) - { - return *(__packed uint32_t*)(ptr); - } - #pragma language=restore - #define __UNALIGNED_UINT32_READ(PTR) __iar_uint32_read(PTR) -#endif - -#ifndef __UNALIGNED_UINT32_WRITE - #pragma language=save - #pragma language=extended - __IAR_FT void __iar_uint32_write(void const *ptr, uint32_t val) - { - *(__packed uint32_t*)(ptr) = val;; - } - #pragma language=restore - #define __UNALIGNED_UINT32_WRITE(PTR,VAL) __iar_uint32_write(PTR,VAL) -#endif - -#if 0 -#ifndef __UNALIGNED_UINT32 /* deprecated */ - #pragma language=save - #pragma language=extended - __packed struct __iar_u32 { uint32_t v; }; - #pragma language=restore - #define __UNALIGNED_UINT32(PTR) (((struct __iar_u32 *)(PTR))->v) -#endif -#endif - -#ifndef __USED - #if __ICCARM_V8 - #define __USED __attribute__((used)) - #else - #define __USED _Pragma("__root") - #endif -#endif - -#ifndef __WEAK - #if __ICCARM_V8 - #define __WEAK __attribute__((weak)) - #else - #define __WEAK _Pragma("__weak") - #endif -#endif - - -#ifndef __ICCARM_INTRINSICS_VERSION__ - #define __ICCARM_INTRINSICS_VERSION__ 0 -#endif - -#if __ICCARM_INTRINSICS_VERSION__ == 2 - - #if defined(__CLZ) - #undef __CLZ - #endif - #if defined(__REVSH) - #undef __REVSH - #endif - #if defined(__RBIT) - #undef __RBIT - #endif - #if defined(__SSAT) - #undef __SSAT - #endif - #if defined(__USAT) - #undef __USAT - #endif - - #include "iccarm_builtin.h" - - #define __enable_irq __iar_builtin_enable_interrupt - #define __disable_irq __iar_builtin_disable_interrupt - #define __enable_fault_irq __iar_builtin_enable_fiq - #define __disable_fault_irq __iar_builtin_disable_fiq - #define __arm_rsr __iar_builtin_rsr - #define __arm_wsr __iar_builtin_wsr - - #if __FPU_PRESENT - #define __get_FPSCR() (__arm_rsr("FPSCR")) - #else - #define __get_FPSCR() ( 0 ) - #endif - - #define __set_FPSCR(VALUE) (__arm_wsr("FPSCR", VALUE)) - - #define __get_CPSR() (__arm_rsr("CPSR")) - #define __get_mode() (__get_CPSR() & 0x1FU) - - #define __set_CPSR(VALUE) (__arm_wsr("CPSR", (VALUE))) - #define __set_mode(VALUE) (__arm_wsr("CPSR_c", (VALUE))) - - - #define __get_FPEXC() (__arm_rsr("FPEXC")) - #define __set_FPEXC(VALUE) (__arm_wsr("FPEXC", VALUE)) - - #define __get_CP(cp, op1, RT, CRn, CRm, op2) \ - ((RT) = __arm_rsr("p" # cp ":" # op1 ":c" # CRn ":c" # CRm ":" # op2)) - - #define __set_CP(cp, op1, RT, CRn, CRm, op2) \ - (__arm_wsr("p" # cp ":" # op1 ":c" # CRn ":c" # CRm ":" # op2, (RT))) - - #define __get_CP64(cp, op1, Rt, CRm) \ - __ASM volatile("MRRC p" # cp ", " # op1 ", %Q0, %R0, c" # CRm : "=r" (Rt) : : "memory" ) - - #define __set_CP64(cp, op1, Rt, CRm) \ - __ASM volatile("MCRR p" # cp ", " # op1 ", %Q0, %R0, c" # CRm : : "r" (Rt) : "memory" ) - - #include "cmsis_cp15.h" - - #define __NOP __iar_builtin_no_operation - - #define __CLZ __iar_builtin_CLZ - #define __CLREX __iar_builtin_CLREX - - #define __DMB __iar_builtin_DMB - #define __DSB __iar_builtin_DSB - #define __ISB __iar_builtin_ISB - - #define __LDREXB __iar_builtin_LDREXB - #define __LDREXH __iar_builtin_LDREXH - #define __LDREXW __iar_builtin_LDREX - - #define __RBIT __iar_builtin_RBIT - #define __REV __iar_builtin_REV - #define __REV16 __iar_builtin_REV16 - - __IAR_FT int16_t __REVSH(int16_t val) - { - return (int16_t) __iar_builtin_REVSH(val); - } - - #define __ROR __iar_builtin_ROR - #define __RRX __iar_builtin_RRX - - #define __SEV __iar_builtin_SEV - - #define __SSAT __iar_builtin_SSAT - - #define __STREXB __iar_builtin_STREXB - #define __STREXH __iar_builtin_STREXH - #define __STREXW __iar_builtin_STREX - - #define __USAT __iar_builtin_USAT - - #define __WFE __iar_builtin_WFE - #define __WFI __iar_builtin_WFI - - #define __SADD8 __iar_builtin_SADD8 - #define __QADD8 __iar_builtin_QADD8 - #define __SHADD8 __iar_builtin_SHADD8 - #define __UADD8 __iar_builtin_UADD8 - #define __UQADD8 __iar_builtin_UQADD8 - #define __UHADD8 __iar_builtin_UHADD8 - #define __SSUB8 __iar_builtin_SSUB8 - #define __QSUB8 __iar_builtin_QSUB8 - #define __SHSUB8 __iar_builtin_SHSUB8 - #define __USUB8 __iar_builtin_USUB8 - #define __UQSUB8 __iar_builtin_UQSUB8 - #define __UHSUB8 __iar_builtin_UHSUB8 - #define __SADD16 __iar_builtin_SADD16 - #define __QADD16 __iar_builtin_QADD16 - #define __SHADD16 __iar_builtin_SHADD16 - #define __UADD16 __iar_builtin_UADD16 - #define __UQADD16 __iar_builtin_UQADD16 - #define __UHADD16 __iar_builtin_UHADD16 - #define __SSUB16 __iar_builtin_SSUB16 - #define __QSUB16 __iar_builtin_QSUB16 - #define __SHSUB16 __iar_builtin_SHSUB16 - #define __USUB16 __iar_builtin_USUB16 - #define __UQSUB16 __iar_builtin_UQSUB16 - #define __UHSUB16 __iar_builtin_UHSUB16 - #define __SASX __iar_builtin_SASX - #define __QASX __iar_builtin_QASX - #define __SHASX __iar_builtin_SHASX - #define __UASX __iar_builtin_UASX - #define __UQASX __iar_builtin_UQASX - #define __UHASX __iar_builtin_UHASX - #define __SSAX __iar_builtin_SSAX - #define __QSAX __iar_builtin_QSAX - #define __SHSAX __iar_builtin_SHSAX - #define __USAX __iar_builtin_USAX - #define __UQSAX __iar_builtin_UQSAX - #define __UHSAX __iar_builtin_UHSAX - #define __USAD8 __iar_builtin_USAD8 - #define __USADA8 __iar_builtin_USADA8 - #define __SSAT16 __iar_builtin_SSAT16 - #define __USAT16 __iar_builtin_USAT16 - #define __UXTB16 __iar_builtin_UXTB16 - #define __UXTAB16 __iar_builtin_UXTAB16 - #define __SXTB16 __iar_builtin_SXTB16 - #define __SXTAB16 __iar_builtin_SXTAB16 - #define __SMUAD __iar_builtin_SMUAD - #define __SMUADX __iar_builtin_SMUADX - #define __SMMLA __iar_builtin_SMMLA - #define __SMLAD __iar_builtin_SMLAD - #define __SMLADX __iar_builtin_SMLADX - #define __SMLALD __iar_builtin_SMLALD - #define __SMLALDX __iar_builtin_SMLALDX - #define __SMUSD __iar_builtin_SMUSD - #define __SMUSDX __iar_builtin_SMUSDX - #define __SMLSD __iar_builtin_SMLSD - #define __SMLSDX __iar_builtin_SMLSDX - #define __SMLSLD __iar_builtin_SMLSLD - #define __SMLSLDX __iar_builtin_SMLSLDX - #define __SEL __iar_builtin_SEL - #define __QADD __iar_builtin_QADD - #define __QSUB __iar_builtin_QSUB - #define __PKHBT __iar_builtin_PKHBT - #define __PKHTB __iar_builtin_PKHTB - -#else /* __ICCARM_INTRINSICS_VERSION__ == 2 */ - - #if !__FPU_PRESENT - #define __get_FPSCR __cmsis_iar_get_FPSR_not_active - #endif - - #ifdef __INTRINSICS_INCLUDED - #error intrinsics.h is already included previously! - #endif - - #include - - #if !__FPU_PRESENT - #define __get_FPSCR() (0) - #endif - - #pragma diag_suppress=Pe940 - #pragma diag_suppress=Pe177 - - #define __enable_irq __enable_interrupt - #define __disable_irq __disable_interrupt - #define __enable_fault_irq __enable_fiq - #define __disable_fault_irq __disable_fiq - #define __NOP __no_operation - - #define __get_xPSR __get_PSR - - __IAR_FT void __set_mode(uint32_t mode) - { - __ASM volatile("MSR cpsr_c, %0" : : "r" (mode) : "memory"); - } - - __IAR_FT uint32_t __LDREXW(uint32_t volatile *ptr) - { - return __LDREX((unsigned long *)ptr); - } - - __IAR_FT uint32_t __STREXW(uint32_t value, uint32_t volatile *ptr) - { - return __STREX(value, (unsigned long *)ptr); - } - - - __IAR_FT uint32_t __RRX(uint32_t value) - { - uint32_t result; - __ASM("RRX %0, %1" : "=r"(result) : "r" (value) : "cc"); - return(result); - } - - - __IAR_FT uint32_t __ROR(uint32_t op1, uint32_t op2) - { - return (op1 >> op2) | (op1 << ((sizeof(op1)*8)-op2)); - } - - __IAR_FT uint32_t __get_FPEXC(void) - { - #if (__FPU_PRESENT == 1) - uint32_t result; - __ASM volatile("VMRS %0, fpexc" : "=r" (result) : : "memory"); - return(result); - #else - return(0); - #endif - } - - __IAR_FT void __set_FPEXC(uint32_t fpexc) - { - #if (__FPU_PRESENT == 1) - __ASM volatile ("VMSR fpexc, %0" : : "r" (fpexc) : "memory"); - #endif - } - - - #define __get_CP(cp, op1, Rt, CRn, CRm, op2) \ - __ASM volatile("MRC p" # cp ", " # op1 ", %0, c" # CRn ", c" # CRm ", " # op2 : "=r" (Rt) : : "memory" ) - #define __set_CP(cp, op1, Rt, CRn, CRm, op2) \ - __ASM volatile("MCR p" # cp ", " # op1 ", %0, c" # CRn ", c" # CRm ", " # op2 : : "r" (Rt) : "memory" ) - #define __get_CP64(cp, op1, Rt, CRm) \ - __ASM volatile("MRRC p" # cp ", " # op1 ", %Q0, %R0, c" # CRm : "=r" (Rt) : : "memory" ) - #define __set_CP64(cp, op1, Rt, CRm) \ - __ASM volatile("MCRR p" # cp ", " # op1 ", %Q0, %R0, c" # CRm : : "r" (Rt) : "memory" ) - - #include "cmsis_cp15.h" - -#endif /* __ICCARM_INTRINSICS_VERSION__ == 2 */ - -#define __BKPT(value) __asm volatile ("BKPT %0" : : "i"(value)) - - -__IAR_FT uint32_t __get_SP_usr(void) -{ - uint32_t cpsr; - uint32_t result; - __ASM volatile( - "MRS %0, cpsr \n" - "CPS #0x1F \n" // no effect in USR mode - "MOV %1, sp \n" - "MSR cpsr_c, %2 \n" // no effect in USR mode - "ISB" : "=r"(cpsr), "=r"(result) : "r"(cpsr) : "memory" - ); - return result; -} - -__IAR_FT void __set_SP_usr(uint32_t topOfProcStack) -{ - uint32_t cpsr; - __ASM volatile( - "MRS %0, cpsr \n" - "CPS #0x1F \n" // no effect in USR mode - "MOV sp, %1 \n" - "MSR cpsr_c, %2 \n" // no effect in USR mode - "ISB" : "=r"(cpsr) : "r" (topOfProcStack), "r"(cpsr) : "memory" - ); -} - -#define __get_mode() (__get_CPSR() & 0x1FU) - -__STATIC_INLINE -void __FPU_Enable(void) -{ - __ASM volatile( - //Permit access to VFP/NEON, registers by modifying CPACR - " MRC p15,0,R1,c1,c0,2 \n" - " ORR R1,R1,#0x00F00000 \n" - " MCR p15,0,R1,c1,c0,2 \n" - - //Ensure that subsequent instructions occur in the context of VFP/NEON access permitted - " ISB \n" - - //Enable VFP/NEON - " VMRS R1,FPEXC \n" - " ORR R1,R1,#0x40000000 \n" - " VMSR FPEXC,R1 \n" - - //Initialise VFP/NEON registers to 0 - " MOV R2,#0 \n" - - //Initialise D16 registers to 0 - " VMOV D0, R2,R2 \n" - " VMOV D1, R2,R2 \n" - " VMOV D2, R2,R2 \n" - " VMOV D3, R2,R2 \n" - " VMOV D4, R2,R2 \n" - " VMOV D5, R2,R2 \n" - " VMOV D6, R2,R2 \n" - " VMOV D7, R2,R2 \n" - " VMOV D8, R2,R2 \n" - " VMOV D9, R2,R2 \n" - " VMOV D10,R2,R2 \n" - " VMOV D11,R2,R2 \n" - " VMOV D12,R2,R2 \n" - " VMOV D13,R2,R2 \n" - " VMOV D14,R2,R2 \n" - " VMOV D15,R2,R2 \n" - -#ifdef __ARM_ADVANCED_SIMD__ - //Initialise D32 registers to 0 - " VMOV D16,R2,R2 \n" - " VMOV D17,R2,R2 \n" - " VMOV D18,R2,R2 \n" - " VMOV D19,R2,R2 \n" - " VMOV D20,R2,R2 \n" - " VMOV D21,R2,R2 \n" - " VMOV D22,R2,R2 \n" - " VMOV D23,R2,R2 \n" - " VMOV D24,R2,R2 \n" - " VMOV D25,R2,R2 \n" - " VMOV D26,R2,R2 \n" - " VMOV D27,R2,R2 \n" - " VMOV D28,R2,R2 \n" - " VMOV D29,R2,R2 \n" - " VMOV D30,R2,R2 \n" - " VMOV D31,R2,R2 \n" -#endif - - //Initialise FPSCR to a known state - " VMRS R1,FPSCR \n" - " MOV32 R2,#0x00086060 \n" //Mask off all bits that do not have to be preserved. Non-preserved bits can/should be zero. - " AND R1,R1,R2 \n" - " VMSR FPSCR,R1 \n" - : : : "cc", "r1", "r2" - ); -} - - - -#undef __IAR_FT -#undef __ICCARM_V8 - -#pragma diag_default=Pe940 -#pragma diag_default=Pe177 - -#endif /* __CMSIS_ICCARM_H__ */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/core_ca.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/core_ca.h deleted file mode 100644 index c4f9269..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/core_ca.h +++ /dev/null @@ -1,2614 +0,0 @@ -/**************************************************************************//** - * @file core_ca.h - * @brief CMSIS Cortex-A Core Peripheral Access Layer Header File - * @version V1.0.3 - * @date 28. January 2020 - ******************************************************************************/ -/* - * Copyright (c) 2009-2020 ARM Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#if defined ( __ICCARM__ ) - #pragma system_include /* treat file as system include file for MISRA check */ -#elif defined (__clang__) - #pragma clang system_header /* treat file as system include file */ -#endif - -#ifndef __CORE_CA_H_GENERIC -#define __CORE_CA_H_GENERIC - -#ifdef __cplusplus - extern "C" { -#endif - -/******************************************************************************* - * CMSIS definitions - ******************************************************************************/ - -/* CMSIS CA definitions */ -#define __CA_CMSIS_VERSION_MAIN (1U) /*!< \brief [31:16] CMSIS-Core(A) main version */ -#define __CA_CMSIS_VERSION_SUB (1U) /*!< \brief [15:0] CMSIS-Core(A) sub version */ -#define __CA_CMSIS_VERSION ((__CA_CMSIS_VERSION_MAIN << 16U) | \ - __CA_CMSIS_VERSION_SUB ) /*!< \brief CMSIS-Core(A) version number */ - -#if defined ( __CC_ARM ) - #if defined __TARGET_FPU_VFP - #if (__FPU_PRESENT == 1) - #define __FPU_USED 1U - #else - #warning "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)" - #define __FPU_USED 0U - #endif - #else - #define __FPU_USED 0U - #endif - -#elif defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) - #if defined __ARM_FP - #if defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U) - #define __FPU_USED 1U - #else - #warning "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)" - #define __FPU_USED 0U - #endif - #else - #define __FPU_USED 0U - #endif - -#elif defined ( __ICCARM__ ) - #if defined __ARMVFP__ - #if (__FPU_PRESENT == 1) - #define __FPU_USED 1U - #else - #warning "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)" - #define __FPU_USED 0U - #endif - #else - #define __FPU_USED 0U - #endif - -#elif defined ( __TMS470__ ) - #if defined __TI_VFP_SUPPORT__ - #if (__FPU_PRESENT == 1) - #define __FPU_USED 1U - #else - #warning "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)" - #define __FPU_USED 0U - #endif - #else - #define __FPU_USED 0U - #endif - -#elif defined ( __GNUC__ ) - #if defined (__VFP_FP__) && !defined(__SOFTFP__) - #if (__FPU_PRESENT == 1) - #define __FPU_USED 1U - #else - #warning "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)" - #define __FPU_USED 0U - #endif - #else - #define __FPU_USED 0U - #endif - -#elif defined ( __TASKING__ ) - #if defined __FPU_VFP__ - #if (__FPU_PRESENT == 1) - #define __FPU_USED 1U - #else - #error "Compiler generates FPU instructions for a device without an FPU (check __FPU_PRESENT)" - #define __FPU_USED 0U - #endif - #else - #define __FPU_USED 0U - #endif -#endif - -#include "cmsis_compiler.h" /* CMSIS compiler specific defines */ - -#ifdef __cplusplus -} -#endif - -#endif /* __CORE_CA_H_GENERIC */ - -#ifndef __CMSIS_GENERIC - -#ifndef __CORE_CA_H_DEPENDANT -#define __CORE_CA_H_DEPENDANT - -#ifdef __cplusplus - extern "C" { -#endif - - /* check device defines and use defaults */ -#if defined __CHECK_DEVICE_DEFINES - #ifndef __CA_REV - #define __CA_REV 0x0000U - #warning "__CA_REV not defined in device header file; using default!" - #endif - - #ifndef __FPU_PRESENT - #define __FPU_PRESENT 0U - #warning "__FPU_PRESENT not defined in device header file; using default!" - #endif - - #ifndef __GIC_PRESENT - #define __GIC_PRESENT 1U - #warning "__GIC_PRESENT not defined in device header file; using default!" - #endif - - #ifndef __TIM_PRESENT - #define __TIM_PRESENT 1U - #warning "__TIM_PRESENT not defined in device header file; using default!" - #endif - - #ifndef __L2C_PRESENT - #define __L2C_PRESENT 0U - #warning "__L2C_PRESENT not defined in device header file; using default!" - #endif -#endif - -/* IO definitions (access restrictions to peripheral registers) */ -#ifdef __cplusplus - #define __I volatile /*!< \brief Defines 'read only' permissions */ -#else - #define __I volatile const /*!< \brief Defines 'read only' permissions */ -#endif -#define __O volatile /*!< \brief Defines 'write only' permissions */ -#define __IO volatile /*!< \brief Defines 'read / write' permissions */ - -/* following defines should be used for structure members */ -#define __IM volatile const /*!< \brief Defines 'read only' structure member permissions */ -#define __OM volatile /*!< \brief Defines 'write only' structure member permissions */ -#define __IOM volatile /*!< \brief Defines 'read / write' structure member permissions */ -#define RESERVED(N, T) T RESERVED##N; // placeholder struct members used for "reserved" areas - - /******************************************************************************* - * Register Abstraction - Core Register contain: - - CPSR - - CP15 Registers - - L2C-310 Cache Controller - - Generic Interrupt Controller Distributor - - Generic Interrupt Controller Interface - ******************************************************************************/ - -/* Core Register CPSR */ -typedef union -{ - struct - { - uint32_t M:5; /*!< \brief bit: 0.. 4 Mode field */ - uint32_t T:1; /*!< \brief bit: 5 Thumb execution state bit */ - uint32_t F:1; /*!< \brief bit: 6 FIQ mask bit */ - uint32_t I:1; /*!< \brief bit: 7 IRQ mask bit */ - uint32_t A:1; /*!< \brief bit: 8 Asynchronous abort mask bit */ - uint32_t E:1; /*!< \brief bit: 9 Endianness execution state bit */ - uint32_t IT1:6; /*!< \brief bit: 10..15 If-Then execution state bits 2-7 */ - uint32_t GE:4; /*!< \brief bit: 16..19 Greater than or Equal flags */ - RESERVED(0:4, uint32_t) - uint32_t J:1; /*!< \brief bit: 24 Jazelle bit */ - uint32_t IT0:2; /*!< \brief bit: 25..26 If-Then execution state bits 0-1 */ - uint32_t Q:1; /*!< \brief bit: 27 Saturation condition flag */ - uint32_t V:1; /*!< \brief bit: 28 Overflow condition code flag */ - uint32_t C:1; /*!< \brief bit: 29 Carry condition code flag */ - uint32_t Z:1; /*!< \brief bit: 30 Zero condition code flag */ - uint32_t N:1; /*!< \brief bit: 31 Negative condition code flag */ - } b; /*!< \brief Structure used for bit access */ - uint32_t w; /*!< \brief Type used for word access */ -} CPSR_Type; - - - -/* CPSR Register Definitions */ -#define CPSR_N_Pos 31U /*!< \brief CPSR: N Position */ -#define CPSR_N_Msk (1UL << CPSR_N_Pos) /*!< \brief CPSR: N Mask */ - -#define CPSR_Z_Pos 30U /*!< \brief CPSR: Z Position */ -#define CPSR_Z_Msk (1UL << CPSR_Z_Pos) /*!< \brief CPSR: Z Mask */ - -#define CPSR_C_Pos 29U /*!< \brief CPSR: C Position */ -#define CPSR_C_Msk (1UL << CPSR_C_Pos) /*!< \brief CPSR: C Mask */ - -#define CPSR_V_Pos 28U /*!< \brief CPSR: V Position */ -#define CPSR_V_Msk (1UL << CPSR_V_Pos) /*!< \brief CPSR: V Mask */ - -#define CPSR_Q_Pos 27U /*!< \brief CPSR: Q Position */ -#define CPSR_Q_Msk (1UL << CPSR_Q_Pos) /*!< \brief CPSR: Q Mask */ - -#define CPSR_IT0_Pos 25U /*!< \brief CPSR: IT0 Position */ -#define CPSR_IT0_Msk (3UL << CPSR_IT0_Pos) /*!< \brief CPSR: IT0 Mask */ - -#define CPSR_J_Pos 24U /*!< \brief CPSR: J Position */ -#define CPSR_J_Msk (1UL << CPSR_J_Pos) /*!< \brief CPSR: J Mask */ - -#define CPSR_GE_Pos 16U /*!< \brief CPSR: GE Position */ -#define CPSR_GE_Msk (0xFUL << CPSR_GE_Pos) /*!< \brief CPSR: GE Mask */ - -#define CPSR_IT1_Pos 10U /*!< \brief CPSR: IT1 Position */ -#define CPSR_IT1_Msk (0x3FUL << CPSR_IT1_Pos) /*!< \brief CPSR: IT1 Mask */ - -#define CPSR_E_Pos 9U /*!< \brief CPSR: E Position */ -#define CPSR_E_Msk (1UL << CPSR_E_Pos) /*!< \brief CPSR: E Mask */ - -#define CPSR_A_Pos 8U /*!< \brief CPSR: A Position */ -#define CPSR_A_Msk (1UL << CPSR_A_Pos) /*!< \brief CPSR: A Mask */ - -#define CPSR_I_Pos 7U /*!< \brief CPSR: I Position */ -#define CPSR_I_Msk (1UL << CPSR_I_Pos) /*!< \brief CPSR: I Mask */ - -#define CPSR_F_Pos 6U /*!< \brief CPSR: F Position */ -#define CPSR_F_Msk (1UL << CPSR_F_Pos) /*!< \brief CPSR: F Mask */ - -#define CPSR_T_Pos 5U /*!< \brief CPSR: T Position */ -#define CPSR_T_Msk (1UL << CPSR_T_Pos) /*!< \brief CPSR: T Mask */ - -#define CPSR_M_Pos 0U /*!< \brief CPSR: M Position */ -#define CPSR_M_Msk (0x1FUL << CPSR_M_Pos) /*!< \brief CPSR: M Mask */ - -#define CPSR_M_USR 0x10U /*!< \brief CPSR: M User mode (PL0) */ -#define CPSR_M_FIQ 0x11U /*!< \brief CPSR: M Fast Interrupt mode (PL1) */ -#define CPSR_M_IRQ 0x12U /*!< \brief CPSR: M Interrupt mode (PL1) */ -#define CPSR_M_SVC 0x13U /*!< \brief CPSR: M Supervisor mode (PL1) */ -#define CPSR_M_MON 0x16U /*!< \brief CPSR: M Monitor mode (PL1) */ -#define CPSR_M_ABT 0x17U /*!< \brief CPSR: M Abort mode (PL1) */ -#define CPSR_M_HYP 0x1AU /*!< \brief CPSR: M Hypervisor mode (PL2) */ -#define CPSR_M_UND 0x1BU /*!< \brief CPSR: M Undefined mode (PL1) */ -#define CPSR_M_SYS 0x1FU /*!< \brief CPSR: M System mode (PL1) */ - -/* CP15 Register SCTLR */ -typedef union -{ - struct - { - uint32_t M:1; /*!< \brief bit: 0 MMU enable */ - uint32_t A:1; /*!< \brief bit: 1 Alignment check enable */ - uint32_t C:1; /*!< \brief bit: 2 Cache enable */ - RESERVED(0:2, uint32_t) - uint32_t CP15BEN:1; /*!< \brief bit: 5 CP15 barrier enable */ - RESERVED(1:1, uint32_t) - uint32_t B:1; /*!< \brief bit: 7 Endianness model */ - RESERVED(2:2, uint32_t) - uint32_t SW:1; /*!< \brief bit: 10 SWP and SWPB enable */ - uint32_t Z:1; /*!< \brief bit: 11 Branch prediction enable */ - uint32_t I:1; /*!< \brief bit: 12 Instruction cache enable */ - uint32_t V:1; /*!< \brief bit: 13 Vectors bit */ - uint32_t RR:1; /*!< \brief bit: 14 Round Robin select */ - RESERVED(3:2, uint32_t) - uint32_t HA:1; /*!< \brief bit: 17 Hardware Access flag enable */ - RESERVED(4:1, uint32_t) - uint32_t WXN:1; /*!< \brief bit: 19 Write permission implies XN */ - uint32_t UWXN:1; /*!< \brief bit: 20 Unprivileged write permission implies PL1 XN */ - uint32_t FI:1; /*!< \brief bit: 21 Fast interrupts configuration enable */ - uint32_t U:1; /*!< \brief bit: 22 Alignment model */ - RESERVED(5:1, uint32_t) - uint32_t VE:1; /*!< \brief bit: 24 Interrupt Vectors Enable */ - uint32_t EE:1; /*!< \brief bit: 25 Exception Endianness */ - RESERVED(6:1, uint32_t) - uint32_t NMFI:1; /*!< \brief bit: 27 Non-maskable FIQ (NMFI) support */ - uint32_t TRE:1; /*!< \brief bit: 28 TEX remap enable. */ - uint32_t AFE:1; /*!< \brief bit: 29 Access flag enable */ - uint32_t TE:1; /*!< \brief bit: 30 Thumb Exception enable */ - RESERVED(7:1, uint32_t) - } b; /*!< \brief Structure used for bit access */ - uint32_t w; /*!< \brief Type used for word access */ -} SCTLR_Type; - -#define SCTLR_TE_Pos 30U /*!< \brief SCTLR: TE Position */ -#define SCTLR_TE_Msk (1UL << SCTLR_TE_Pos) /*!< \brief SCTLR: TE Mask */ - -#define SCTLR_AFE_Pos 29U /*!< \brief SCTLR: AFE Position */ -#define SCTLR_AFE_Msk (1UL << SCTLR_AFE_Pos) /*!< \brief SCTLR: AFE Mask */ - -#define SCTLR_TRE_Pos 28U /*!< \brief SCTLR: TRE Position */ -#define SCTLR_TRE_Msk (1UL << SCTLR_TRE_Pos) /*!< \brief SCTLR: TRE Mask */ - -#define SCTLR_NMFI_Pos 27U /*!< \brief SCTLR: NMFI Position */ -#define SCTLR_NMFI_Msk (1UL << SCTLR_NMFI_Pos) /*!< \brief SCTLR: NMFI Mask */ - -#define SCTLR_EE_Pos 25U /*!< \brief SCTLR: EE Position */ -#define SCTLR_EE_Msk (1UL << SCTLR_EE_Pos) /*!< \brief SCTLR: EE Mask */ - -#define SCTLR_VE_Pos 24U /*!< \brief SCTLR: VE Position */ -#define SCTLR_VE_Msk (1UL << SCTLR_VE_Pos) /*!< \brief SCTLR: VE Mask */ - -#define SCTLR_U_Pos 22U /*!< \brief SCTLR: U Position */ -#define SCTLR_U_Msk (1UL << SCTLR_U_Pos) /*!< \brief SCTLR: U Mask */ - -#define SCTLR_FI_Pos 21U /*!< \brief SCTLR: FI Position */ -#define SCTLR_FI_Msk (1UL << SCTLR_FI_Pos) /*!< \brief SCTLR: FI Mask */ - -#define SCTLR_UWXN_Pos 20U /*!< \brief SCTLR: UWXN Position */ -#define SCTLR_UWXN_Msk (1UL << SCTLR_UWXN_Pos) /*!< \brief SCTLR: UWXN Mask */ - -#define SCTLR_WXN_Pos 19U /*!< \brief SCTLR: WXN Position */ -#define SCTLR_WXN_Msk (1UL << SCTLR_WXN_Pos) /*!< \brief SCTLR: WXN Mask */ - -#define SCTLR_HA_Pos 17U /*!< \brief SCTLR: HA Position */ -#define SCTLR_HA_Msk (1UL << SCTLR_HA_Pos) /*!< \brief SCTLR: HA Mask */ - -#define SCTLR_RR_Pos 14U /*!< \brief SCTLR: RR Position */ -#define SCTLR_RR_Msk (1UL << SCTLR_RR_Pos) /*!< \brief SCTLR: RR Mask */ - -#define SCTLR_V_Pos 13U /*!< \brief SCTLR: V Position */ -#define SCTLR_V_Msk (1UL << SCTLR_V_Pos) /*!< \brief SCTLR: V Mask */ - -#define SCTLR_I_Pos 12U /*!< \brief SCTLR: I Position */ -#define SCTLR_I_Msk (1UL << SCTLR_I_Pos) /*!< \brief SCTLR: I Mask */ - -#define SCTLR_Z_Pos 11U /*!< \brief SCTLR: Z Position */ -#define SCTLR_Z_Msk (1UL << SCTLR_Z_Pos) /*!< \brief SCTLR: Z Mask */ - -#define SCTLR_SW_Pos 10U /*!< \brief SCTLR: SW Position */ -#define SCTLR_SW_Msk (1UL << SCTLR_SW_Pos) /*!< \brief SCTLR: SW Mask */ - -#define SCTLR_B_Pos 7U /*!< \brief SCTLR: B Position */ -#define SCTLR_B_Msk (1UL << SCTLR_B_Pos) /*!< \brief SCTLR: B Mask */ - -#define SCTLR_CP15BEN_Pos 5U /*!< \brief SCTLR: CP15BEN Position */ -#define SCTLR_CP15BEN_Msk (1UL << SCTLR_CP15BEN_Pos) /*!< \brief SCTLR: CP15BEN Mask */ - -#define SCTLR_C_Pos 2U /*!< \brief SCTLR: C Position */ -#define SCTLR_C_Msk (1UL << SCTLR_C_Pos) /*!< \brief SCTLR: C Mask */ - -#define SCTLR_A_Pos 1U /*!< \brief SCTLR: A Position */ -#define SCTLR_A_Msk (1UL << SCTLR_A_Pos) /*!< \brief SCTLR: A Mask */ - -#define SCTLR_M_Pos 0U /*!< \brief SCTLR: M Position */ -#define SCTLR_M_Msk (1UL << SCTLR_M_Pos) /*!< \brief SCTLR: M Mask */ - -/* CP15 Register ACTLR */ -typedef union -{ -#if __CORTEX_A == 5 || defined(DOXYGEN) - /** \brief Structure used for bit access on Cortex-A5 */ - struct - { - uint32_t FW:1; /*!< \brief bit: 0 Cache and TLB maintenance broadcast */ - RESERVED(0:5, uint32_t) - uint32_t SMP:1; /*!< \brief bit: 6 Enables coherent requests to the processor */ - uint32_t EXCL:1; /*!< \brief bit: 7 Exclusive L1/L2 cache control */ - RESERVED(1:2, uint32_t) - uint32_t DODMBS:1; /*!< \brief bit: 10 Disable optimized data memory barrier behavior */ - uint32_t DWBST:1; /*!< \brief bit: 11 AXI data write bursts to Normal memory */ - uint32_t RADIS:1; /*!< \brief bit: 12 L1 Data Cache read-allocate mode disable */ - uint32_t L1PCTL:2; /*!< \brief bit:13..14 L1 Data prefetch control */ - uint32_t BP:2; /*!< \brief bit:16..15 Branch prediction policy */ - uint32_t RSDIS:1; /*!< \brief bit: 17 Disable return stack operation */ - uint32_t BTDIS:1; /*!< \brief bit: 18 Disable indirect Branch Target Address Cache (BTAC) */ - RESERVED(3:9, uint32_t) - uint32_t DBDI:1; /*!< \brief bit: 28 Disable branch dual issue */ - RESERVED(7:3, uint32_t) - } b; -#endif -#if __CORTEX_A == 7 || defined(DOXYGEN) - /** \brief Structure used for bit access on Cortex-A7 */ - struct - { - RESERVED(0:6, uint32_t) - uint32_t SMP:1; /*!< \brief bit: 6 Enables coherent requests to the processor */ - RESERVED(1:3, uint32_t) - uint32_t DODMBS:1; /*!< \brief bit: 10 Disable optimized data memory barrier behavior */ - uint32_t L2RADIS:1; /*!< \brief bit: 11 L2 Data Cache read-allocate mode disable */ - uint32_t L1RADIS:1; /*!< \brief bit: 12 L1 Data Cache read-allocate mode disable */ - uint32_t L1PCTL:2; /*!< \brief bit:13..14 L1 Data prefetch control */ - uint32_t DDVM:1; /*!< \brief bit: 15 Disable Distributed Virtual Memory (DVM) transactions */ - RESERVED(3:12, uint32_t) - uint32_t DDI:1; /*!< \brief bit: 28 Disable dual issue */ - RESERVED(7:3, uint32_t) - } b; -#endif -#if __CORTEX_A == 9 || defined(DOXYGEN) - /** \brief Structure used for bit access on Cortex-A9 */ - struct - { - uint32_t FW:1; /*!< \brief bit: 0 Cache and TLB maintenance broadcast */ - RESERVED(0:1, uint32_t) - uint32_t L1PE:1; /*!< \brief bit: 2 Dside prefetch */ - uint32_t WFLZM:1; /*!< \brief bit: 3 Cache and TLB maintenance broadcast */ - RESERVED(1:2, uint32_t) - uint32_t SMP:1; /*!< \brief bit: 6 Enables coherent requests to the processor */ - uint32_t EXCL:1; /*!< \brief bit: 7 Exclusive L1/L2 cache control */ - uint32_t AOW:1; /*!< \brief bit: 8 Enable allocation in one cache way only */ - uint32_t PARITY:1; /*!< \brief bit: 9 Support for parity checking, if implemented */ - RESERVED(7:22, uint32_t) - } b; -#endif - uint32_t w; /*!< \brief Type used for word access */ -} ACTLR_Type; - -#define ACTLR_DDI_Pos 28U /*!< \brief ACTLR: DDI Position */ -#define ACTLR_DDI_Msk (1UL << ACTLR_DDI_Pos) /*!< \brief ACTLR: DDI Mask */ - -#define ACTLR_DBDI_Pos 28U /*!< \brief ACTLR: DBDI Position */ -#define ACTLR_DBDI_Msk (1UL << ACTLR_DBDI_Pos) /*!< \brief ACTLR: DBDI Mask */ - -#define ACTLR_BTDIS_Pos 18U /*!< \brief ACTLR: BTDIS Position */ -#define ACTLR_BTDIS_Msk (1UL << ACTLR_BTDIS_Pos) /*!< \brief ACTLR: BTDIS Mask */ - -#define ACTLR_RSDIS_Pos 17U /*!< \brief ACTLR: RSDIS Position */ -#define ACTLR_RSDIS_Msk (1UL << ACTLR_RSDIS_Pos) /*!< \brief ACTLR: RSDIS Mask */ - -#define ACTLR_BP_Pos 15U /*!< \brief ACTLR: BP Position */ -#define ACTLR_BP_Msk (3UL << ACTLR_BP_Pos) /*!< \brief ACTLR: BP Mask */ - -#define ACTLR_DDVM_Pos 15U /*!< \brief ACTLR: DDVM Position */ -#define ACTLR_DDVM_Msk (1UL << ACTLR_DDVM_Pos) /*!< \brief ACTLR: DDVM Mask */ - -#define ACTLR_L1PCTL_Pos 13U /*!< \brief ACTLR: L1PCTL Position */ -#define ACTLR_L1PCTL_Msk (3UL << ACTLR_L1PCTL_Pos) /*!< \brief ACTLR: L1PCTL Mask */ - -#define ACTLR_RADIS_Pos 12U /*!< \brief ACTLR: RADIS Position */ -#define ACTLR_RADIS_Msk (1UL << ACTLR_RADIS_Pos) /*!< \brief ACTLR: RADIS Mask */ - -#define ACTLR_L1RADIS_Pos 12U /*!< \brief ACTLR: L1RADIS Position */ -#define ACTLR_L1RADIS_Msk (1UL << ACTLR_L1RADIS_Pos) /*!< \brief ACTLR: L1RADIS Mask */ - -#define ACTLR_DWBST_Pos 11U /*!< \brief ACTLR: DWBST Position */ -#define ACTLR_DWBST_Msk (1UL << ACTLR_DWBST_Pos) /*!< \brief ACTLR: DWBST Mask */ - -#define ACTLR_L2RADIS_Pos 11U /*!< \brief ACTLR: L2RADIS Position */ -#define ACTLR_L2RADIS_Msk (1UL << ACTLR_L2RADIS_Pos) /*!< \brief ACTLR: L2RADIS Mask */ - -#define ACTLR_DODMBS_Pos 10U /*!< \brief ACTLR: DODMBS Position */ -#define ACTLR_DODMBS_Msk (1UL << ACTLR_DODMBS_Pos) /*!< \brief ACTLR: DODMBS Mask */ - -#define ACTLR_PARITY_Pos 9U /*!< \brief ACTLR: PARITY Position */ -#define ACTLR_PARITY_Msk (1UL << ACTLR_PARITY_Pos) /*!< \brief ACTLR: PARITY Mask */ - -#define ACTLR_AOW_Pos 8U /*!< \brief ACTLR: AOW Position */ -#define ACTLR_AOW_Msk (1UL << ACTLR_AOW_Pos) /*!< \brief ACTLR: AOW Mask */ - -#define ACTLR_EXCL_Pos 7U /*!< \brief ACTLR: EXCL Position */ -#define ACTLR_EXCL_Msk (1UL << ACTLR_EXCL_Pos) /*!< \brief ACTLR: EXCL Mask */ - -#define ACTLR_SMP_Pos 6U /*!< \brief ACTLR: SMP Position */ -#define ACTLR_SMP_Msk (1UL << ACTLR_SMP_Pos) /*!< \brief ACTLR: SMP Mask */ - -#define ACTLR_WFLZM_Pos 3U /*!< \brief ACTLR: WFLZM Position */ -#define ACTLR_WFLZM_Msk (1UL << ACTLR_WFLZM_Pos) /*!< \brief ACTLR: WFLZM Mask */ - -#define ACTLR_L1PE_Pos 2U /*!< \brief ACTLR: L1PE Position */ -#define ACTLR_L1PE_Msk (1UL << ACTLR_L1PE_Pos) /*!< \brief ACTLR: L1PE Mask */ - -#define ACTLR_FW_Pos 0U /*!< \brief ACTLR: FW Position */ -#define ACTLR_FW_Msk (1UL << ACTLR_FW_Pos) /*!< \brief ACTLR: FW Mask */ - -/* CP15 Register CPACR */ -typedef union -{ - struct - { - uint32_t CP0:2; /*!< \brief bit: 0..1 Access rights for coprocessor 0 */ - uint32_t CP1:2; /*!< \brief bit: 2..3 Access rights for coprocessor 1 */ - uint32_t CP2:2; /*!< \brief bit: 4..5 Access rights for coprocessor 2 */ - uint32_t CP3:2; /*!< \brief bit: 6..7 Access rights for coprocessor 3 */ - uint32_t CP4:2; /*!< \brief bit: 8..9 Access rights for coprocessor 4 */ - uint32_t CP5:2; /*!< \brief bit:10..11 Access rights for coprocessor 5 */ - uint32_t CP6:2; /*!< \brief bit:12..13 Access rights for coprocessor 6 */ - uint32_t CP7:2; /*!< \brief bit:14..15 Access rights for coprocessor 7 */ - uint32_t CP8:2; /*!< \brief bit:16..17 Access rights for coprocessor 8 */ - uint32_t CP9:2; /*!< \brief bit:18..19 Access rights for coprocessor 9 */ - uint32_t CP10:2; /*!< \brief bit:20..21 Access rights for coprocessor 10 */ - uint32_t CP11:2; /*!< \brief bit:22..23 Access rights for coprocessor 11 */ - uint32_t CP12:2; /*!< \brief bit:24..25 Access rights for coprocessor 11 */ - uint32_t CP13:2; /*!< \brief bit:26..27 Access rights for coprocessor 11 */ - uint32_t TRCDIS:1; /*!< \brief bit: 28 Disable CP14 access to trace registers */ - RESERVED(0:1, uint32_t) - uint32_t D32DIS:1; /*!< \brief bit: 30 Disable use of registers D16-D31 of the VFP register file */ - uint32_t ASEDIS:1; /*!< \brief bit: 31 Disable Advanced SIMD Functionality */ - } b; /*!< \brief Structure used for bit access */ - uint32_t w; /*!< \brief Type used for word access */ -} CPACR_Type; - -#define CPACR_ASEDIS_Pos 31U /*!< \brief CPACR: ASEDIS Position */ -#define CPACR_ASEDIS_Msk (1UL << CPACR_ASEDIS_Pos) /*!< \brief CPACR: ASEDIS Mask */ - -#define CPACR_D32DIS_Pos 30U /*!< \brief CPACR: D32DIS Position */ -#define CPACR_D32DIS_Msk (1UL << CPACR_D32DIS_Pos) /*!< \brief CPACR: D32DIS Mask */ - -#define CPACR_TRCDIS_Pos 28U /*!< \brief CPACR: D32DIS Position */ -#define CPACR_TRCDIS_Msk (1UL << CPACR_D32DIS_Pos) /*!< \brief CPACR: D32DIS Mask */ - -#define CPACR_CP_Pos_(n) (n*2U) /*!< \brief CPACR: CPn Position */ -#define CPACR_CP_Msk_(n) (3UL << CPACR_CP_Pos_(n)) /*!< \brief CPACR: CPn Mask */ - -#define CPACR_CP_NA 0U /*!< \brief CPACR CPn field: Access denied. */ -#define CPACR_CP_PL1 1U /*!< \brief CPACR CPn field: Accessible from PL1 only. */ -#define CPACR_CP_FA 3U /*!< \brief CPACR CPn field: Full access. */ - -/* CP15 Register DFSR */ -typedef union -{ - struct - { - uint32_t FS0:4; /*!< \brief bit: 0.. 3 Fault Status bits bit 0-3 */ - uint32_t Domain:4; /*!< \brief bit: 4.. 7 Fault on which domain */ - RESERVED(0:1, uint32_t) - uint32_t LPAE:1; /*!< \brief bit: 9 Large Physical Address Extension */ - uint32_t FS1:1; /*!< \brief bit: 10 Fault Status bits bit 4 */ - uint32_t WnR:1; /*!< \brief bit: 11 Write not Read bit */ - uint32_t ExT:1; /*!< \brief bit: 12 External abort type */ - uint32_t CM:1; /*!< \brief bit: 13 Cache maintenance fault */ - RESERVED(1:18, uint32_t) - } s; /*!< \brief Structure used for bit access in short format */ - struct - { - uint32_t STATUS:5; /*!< \brief bit: 0.. 5 Fault Status bits */ - RESERVED(0:3, uint32_t) - uint32_t LPAE:1; /*!< \brief bit: 9 Large Physical Address Extension */ - RESERVED(1:1, uint32_t) - uint32_t WnR:1; /*!< \brief bit: 11 Write not Read bit */ - uint32_t ExT:1; /*!< \brief bit: 12 External abort type */ - uint32_t CM:1; /*!< \brief bit: 13 Cache maintenance fault */ - RESERVED(2:18, uint32_t) - } l; /*!< \brief Structure used for bit access in long format */ - uint32_t w; /*!< \brief Type used for word access */ -} DFSR_Type; - -#define DFSR_CM_Pos 13U /*!< \brief DFSR: CM Position */ -#define DFSR_CM_Msk (1UL << DFSR_CM_Pos) /*!< \brief DFSR: CM Mask */ - -#define DFSR_Ext_Pos 12U /*!< \brief DFSR: Ext Position */ -#define DFSR_Ext_Msk (1UL << DFSR_Ext_Pos) /*!< \brief DFSR: Ext Mask */ - -#define DFSR_WnR_Pos 11U /*!< \brief DFSR: WnR Position */ -#define DFSR_WnR_Msk (1UL << DFSR_WnR_Pos) /*!< \brief DFSR: WnR Mask */ - -#define DFSR_FS1_Pos 10U /*!< \brief DFSR: FS1 Position */ -#define DFSR_FS1_Msk (1UL << DFSR_FS1_Pos) /*!< \brief DFSR: FS1 Mask */ - -#define DFSR_LPAE_Pos 9U /*!< \brief DFSR: LPAE Position */ -#define DFSR_LPAE_Msk (1UL << DFSR_LPAE_Pos) /*!< \brief DFSR: LPAE Mask */ - -#define DFSR_Domain_Pos 4U /*!< \brief DFSR: Domain Position */ -#define DFSR_Domain_Msk (0xFUL << DFSR_Domain_Pos) /*!< \brief DFSR: Domain Mask */ - -#define DFSR_FS0_Pos 0U /*!< \brief DFSR: FS0 Position */ -#define DFSR_FS0_Msk (0xFUL << DFSR_FS0_Pos) /*!< \brief DFSR: FS0 Mask */ - -#define DFSR_STATUS_Pos 0U /*!< \brief DFSR: STATUS Position */ -#define DFSR_STATUS_Msk (0x3FUL << DFSR_STATUS_Pos) /*!< \brief DFSR: STATUS Mask */ - -/* CP15 Register IFSR */ -typedef union -{ - struct - { - uint32_t FS0:4; /*!< \brief bit: 0.. 3 Fault Status bits bit 0-3 */ - RESERVED(0:5, uint32_t) - uint32_t LPAE:1; /*!< \brief bit: 9 Large Physical Address Extension */ - uint32_t FS1:1; /*!< \brief bit: 10 Fault Status bits bit 4 */ - RESERVED(1:1, uint32_t) - uint32_t ExT:1; /*!< \brief bit: 12 External abort type */ - RESERVED(2:19, uint32_t) - } s; /*!< \brief Structure used for bit access in short format */ - struct - { - uint32_t STATUS:6; /*!< \brief bit: 0.. 5 Fault Status bits */ - RESERVED(0:3, uint32_t) - uint32_t LPAE:1; /*!< \brief bit: 9 Large Physical Address Extension */ - RESERVED(1:2, uint32_t) - uint32_t ExT:1; /*!< \brief bit: 12 External abort type */ - RESERVED(2:19, uint32_t) - } l; /*!< \brief Structure used for bit access in long format */ - uint32_t w; /*!< \brief Type used for word access */ -} IFSR_Type; - -#define IFSR_ExT_Pos 12U /*!< \brief IFSR: ExT Position */ -#define IFSR_ExT_Msk (1UL << IFSR_ExT_Pos) /*!< \brief IFSR: ExT Mask */ - -#define IFSR_FS1_Pos 10U /*!< \brief IFSR: FS1 Position */ -#define IFSR_FS1_Msk (1UL << IFSR_FS1_Pos) /*!< \brief IFSR: FS1 Mask */ - -#define IFSR_LPAE_Pos 9U /*!< \brief IFSR: LPAE Position */ -#define IFSR_LPAE_Msk (0x1UL << IFSR_LPAE_Pos) /*!< \brief IFSR: LPAE Mask */ - -#define IFSR_FS0_Pos 0U /*!< \brief IFSR: FS0 Position */ -#define IFSR_FS0_Msk (0xFUL << IFSR_FS0_Pos) /*!< \brief IFSR: FS0 Mask */ - -#define IFSR_STATUS_Pos 0U /*!< \brief IFSR: STATUS Position */ -#define IFSR_STATUS_Msk (0x3FUL << IFSR_STATUS_Pos) /*!< \brief IFSR: STATUS Mask */ - -/* CP15 Register ISR */ -typedef union -{ - struct - { - RESERVED(0:6, uint32_t) - uint32_t F:1; /*!< \brief bit: 6 FIQ pending bit */ - uint32_t I:1; /*!< \brief bit: 7 IRQ pending bit */ - uint32_t A:1; /*!< \brief bit: 8 External abort pending bit */ - RESERVED(1:23, uint32_t) - } b; /*!< \brief Structure used for bit access */ - uint32_t w; /*!< \brief Type used for word access */ -} ISR_Type; - -#define ISR_A_Pos 13U /*!< \brief ISR: A Position */ -#define ISR_A_Msk (1UL << ISR_A_Pos) /*!< \brief ISR: A Mask */ - -#define ISR_I_Pos 12U /*!< \brief ISR: I Position */ -#define ISR_I_Msk (1UL << ISR_I_Pos) /*!< \brief ISR: I Mask */ - -#define ISR_F_Pos 11U /*!< \brief ISR: F Position */ -#define ISR_F_Msk (1UL << ISR_F_Pos) /*!< \brief ISR: F Mask */ - -/* DACR Register */ -#define DACR_D_Pos_(n) (2U*n) /*!< \brief DACR: Dn Position */ -#define DACR_D_Msk_(n) (3UL << DACR_D_Pos_(n)) /*!< \brief DACR: Dn Mask */ -#define DACR_Dn_NOACCESS 0U /*!< \brief DACR Dn field: No access */ -#define DACR_Dn_CLIENT 1U /*!< \brief DACR Dn field: Client */ -#define DACR_Dn_MANAGER 3U /*!< \brief DACR Dn field: Manager */ - -/** - \brief Mask and shift a bit field value for use in a register bit range. - \param [in] field Name of the register bit field. - \param [in] value Value of the bit field. This parameter is interpreted as an uint32_t type. - \return Masked and shifted value. -*/ -#define _VAL2FLD(field, value) (((uint32_t)(value) << field ## _Pos) & field ## _Msk) - -/** - \brief Mask and shift a register value to extract a bit filed value. - \param [in] field Name of the register bit field. - \param [in] value Value of register. This parameter is interpreted as an uint32_t type. - \return Masked and shifted bit field value. -*/ -#define _FLD2VAL(field, value) (((uint32_t)(value) & field ## _Msk) >> field ## _Pos) - - -/** - \brief Union type to access the L2C_310 Cache Controller. -*/ -#if (__L2C_PRESENT == 1U) || defined(DOXYGEN) -typedef struct -{ - __IM uint32_t CACHE_ID; /*!< \brief Offset: 0x0000 (R/ ) Cache ID Register */ - __IM uint32_t CACHE_TYPE; /*!< \brief Offset: 0x0004 (R/ ) Cache Type Register */ - RESERVED(0[0x3e], uint32_t) - __IOM uint32_t CONTROL; /*!< \brief Offset: 0x0100 (R/W) Control Register */ - __IOM uint32_t AUX_CNT; /*!< \brief Offset: 0x0104 (R/W) Auxiliary Control */ - RESERVED(1[0x3e], uint32_t) - __IOM uint32_t EVENT_CONTROL; /*!< \brief Offset: 0x0200 (R/W) Event Counter Control */ - __IOM uint32_t EVENT_COUNTER1_CONF; /*!< \brief Offset: 0x0204 (R/W) Event Counter 1 Configuration */ - __IOM uint32_t EVENT_COUNTER0_CONF; /*!< \brief Offset: 0x0208 (R/W) Event Counter 1 Configuration */ - RESERVED(2[0x2], uint32_t) - __IOM uint32_t INTERRUPT_MASK; /*!< \brief Offset: 0x0214 (R/W) Interrupt Mask */ - __IM uint32_t MASKED_INT_STATUS; /*!< \brief Offset: 0x0218 (R/ ) Masked Interrupt Status */ - __IM uint32_t RAW_INT_STATUS; /*!< \brief Offset: 0x021c (R/ ) Raw Interrupt Status */ - __OM uint32_t INTERRUPT_CLEAR; /*!< \brief Offset: 0x0220 ( /W) Interrupt Clear */ - RESERVED(3[0x143], uint32_t) - __IOM uint32_t CACHE_SYNC; /*!< \brief Offset: 0x0730 (R/W) Cache Sync */ - RESERVED(4[0xf], uint32_t) - __IOM uint32_t INV_LINE_PA; /*!< \brief Offset: 0x0770 (R/W) Invalidate Line By PA */ - RESERVED(6[2], uint32_t) - __IOM uint32_t INV_WAY; /*!< \brief Offset: 0x077c (R/W) Invalidate by Way */ - RESERVED(5[0xc], uint32_t) - __IOM uint32_t CLEAN_LINE_PA; /*!< \brief Offset: 0x07b0 (R/W) Clean Line by PA */ - RESERVED(7[1], uint32_t) - __IOM uint32_t CLEAN_LINE_INDEX_WAY; /*!< \brief Offset: 0x07b8 (R/W) Clean Line by Index/Way */ - __IOM uint32_t CLEAN_WAY; /*!< \brief Offset: 0x07bc (R/W) Clean by Way */ - RESERVED(8[0xc], uint32_t) - __IOM uint32_t CLEAN_INV_LINE_PA; /*!< \brief Offset: 0x07f0 (R/W) Clean and Invalidate Line by PA */ - RESERVED(9[1], uint32_t) - __IOM uint32_t CLEAN_INV_LINE_INDEX_WAY; /*!< \brief Offset: 0x07f8 (R/W) Clean and Invalidate Line by Index/Way */ - __IOM uint32_t CLEAN_INV_WAY; /*!< \brief Offset: 0x07fc (R/W) Clean and Invalidate by Way */ - RESERVED(10[0x40], uint32_t) - __IOM uint32_t DATA_LOCK_0_WAY; /*!< \brief Offset: 0x0900 (R/W) Data Lockdown 0 by Way */ - __IOM uint32_t INST_LOCK_0_WAY; /*!< \brief Offset: 0x0904 (R/W) Instruction Lockdown 0 by Way */ - __IOM uint32_t DATA_LOCK_1_WAY; /*!< \brief Offset: 0x0908 (R/W) Data Lockdown 1 by Way */ - __IOM uint32_t INST_LOCK_1_WAY; /*!< \brief Offset: 0x090c (R/W) Instruction Lockdown 1 by Way */ - __IOM uint32_t DATA_LOCK_2_WAY; /*!< \brief Offset: 0x0910 (R/W) Data Lockdown 2 by Way */ - __IOM uint32_t INST_LOCK_2_WAY; /*!< \brief Offset: 0x0914 (R/W) Instruction Lockdown 2 by Way */ - __IOM uint32_t DATA_LOCK_3_WAY; /*!< \brief Offset: 0x0918 (R/W) Data Lockdown 3 by Way */ - __IOM uint32_t INST_LOCK_3_WAY; /*!< \brief Offset: 0x091c (R/W) Instruction Lockdown 3 by Way */ - __IOM uint32_t DATA_LOCK_4_WAY; /*!< \brief Offset: 0x0920 (R/W) Data Lockdown 4 by Way */ - __IOM uint32_t INST_LOCK_4_WAY; /*!< \brief Offset: 0x0924 (R/W) Instruction Lockdown 4 by Way */ - __IOM uint32_t DATA_LOCK_5_WAY; /*!< \brief Offset: 0x0928 (R/W) Data Lockdown 5 by Way */ - __IOM uint32_t INST_LOCK_5_WAY; /*!< \brief Offset: 0x092c (R/W) Instruction Lockdown 5 by Way */ - __IOM uint32_t DATA_LOCK_6_WAY; /*!< \brief Offset: 0x0930 (R/W) Data Lockdown 5 by Way */ - __IOM uint32_t INST_LOCK_6_WAY; /*!< \brief Offset: 0x0934 (R/W) Instruction Lockdown 5 by Way */ - __IOM uint32_t DATA_LOCK_7_WAY; /*!< \brief Offset: 0x0938 (R/W) Data Lockdown 6 by Way */ - __IOM uint32_t INST_LOCK_7_WAY; /*!< \brief Offset: 0x093c (R/W) Instruction Lockdown 6 by Way */ - RESERVED(11[0x4], uint32_t) - __IOM uint32_t LOCK_LINE_EN; /*!< \brief Offset: 0x0950 (R/W) Lockdown by Line Enable */ - __IOM uint32_t UNLOCK_ALL_BY_WAY; /*!< \brief Offset: 0x0954 (R/W) Unlock All Lines by Way */ - RESERVED(12[0xaa], uint32_t) - __IOM uint32_t ADDRESS_FILTER_START; /*!< \brief Offset: 0x0c00 (R/W) Address Filtering Start */ - __IOM uint32_t ADDRESS_FILTER_END; /*!< \brief Offset: 0x0c04 (R/W) Address Filtering End */ - RESERVED(13[0xce], uint32_t) - __IOM uint32_t DEBUG_CONTROL; /*!< \brief Offset: 0x0f40 (R/W) Debug Control Register */ -} L2C_310_TypeDef; - -#define L2C_310 ((L2C_310_TypeDef *)L2C_310_BASE) /*!< \brief L2C_310 register set access pointer */ -#endif - -#if (__GIC_PRESENT == 1U) || defined(DOXYGEN) - -/** \brief Structure type to access the Generic Interrupt Controller Distributor (GICD) -*/ -typedef struct -{ - __IOM uint32_t CTLR; /*!< \brief Offset: 0x000 (R/W) Distributor Control Register */ - __IM uint32_t TYPER; /*!< \brief Offset: 0x004 (R/ ) Interrupt Controller Type Register */ - __IM uint32_t IIDR; /*!< \brief Offset: 0x008 (R/ ) Distributor Implementer Identification Register */ - RESERVED(0, uint32_t) - __IOM uint32_t STATUSR; /*!< \brief Offset: 0x010 (R/W) Error Reporting Status Register, optional */ - RESERVED(1[11], uint32_t) - __OM uint32_t SETSPI_NSR; /*!< \brief Offset: 0x040 ( /W) Set SPI Register */ - RESERVED(2, uint32_t) - __OM uint32_t CLRSPI_NSR; /*!< \brief Offset: 0x048 ( /W) Clear SPI Register */ - RESERVED(3, uint32_t) - __OM uint32_t SETSPI_SR; /*!< \brief Offset: 0x050 ( /W) Set SPI, Secure Register */ - RESERVED(4, uint32_t) - __OM uint32_t CLRSPI_SR; /*!< \brief Offset: 0x058 ( /W) Clear SPI, Secure Register */ - RESERVED(5[9], uint32_t) - __IOM uint32_t IGROUPR[32]; /*!< \brief Offset: 0x080 (R/W) Interrupt Group Registers */ - __IOM uint32_t ISENABLER[32]; /*!< \brief Offset: 0x100 (R/W) Interrupt Set-Enable Registers */ - __IOM uint32_t ICENABLER[32]; /*!< \brief Offset: 0x180 (R/W) Interrupt Clear-Enable Registers */ - __IOM uint32_t ISPENDR[32]; /*!< \brief Offset: 0x200 (R/W) Interrupt Set-Pending Registers */ - __IOM uint32_t ICPENDR[32]; /*!< \brief Offset: 0x280 (R/W) Interrupt Clear-Pending Registers */ - __IOM uint32_t ISACTIVER[32]; /*!< \brief Offset: 0x300 (R/W) Interrupt Set-Active Registers */ - __IOM uint32_t ICACTIVER[32]; /*!< \brief Offset: 0x380 (R/W) Interrupt Clear-Active Registers */ - __IOM uint32_t IPRIORITYR[255]; /*!< \brief Offset: 0x400 (R/W) Interrupt Priority Registers */ - RESERVED(6, uint32_t) - __IOM uint32_t ITARGETSR[255]; /*!< \brief Offset: 0x800 (R/W) Interrupt Targets Registers */ - RESERVED(7, uint32_t) - __IOM uint32_t ICFGR[64]; /*!< \brief Offset: 0xC00 (R/W) Interrupt Configuration Registers */ - __IOM uint32_t IGRPMODR[32]; /*!< \brief Offset: 0xD00 (R/W) Interrupt Group Modifier Registers */ - RESERVED(8[32], uint32_t) - __IOM uint32_t NSACR[64]; /*!< \brief Offset: 0xE00 (R/W) Non-secure Access Control Registers */ - __OM uint32_t SGIR; /*!< \brief Offset: 0xF00 ( /W) Software Generated Interrupt Register */ - RESERVED(9[3], uint32_t) - __IOM uint32_t CPENDSGIR[4]; /*!< \brief Offset: 0xF10 (R/W) SGI Clear-Pending Registers */ - __IOM uint32_t SPENDSGIR[4]; /*!< \brief Offset: 0xF20 (R/W) SGI Set-Pending Registers */ - RESERVED(10[5236], uint32_t) - __IOM uint64_t IROUTER[988]; /*!< \brief Offset: 0x6100(R/W) Interrupt Routing Registers */ -} GICDistributor_Type; - -#define GICDistributor ((GICDistributor_Type *) GIC_DISTRIBUTOR_BASE ) /*!< \brief GIC Distributor register set access pointer */ - -/** \brief Structure type to access the Generic Interrupt Controller Interface (GICC) -*/ -typedef struct -{ - __IOM uint32_t CTLR; /*!< \brief Offset: 0x000 (R/W) CPU Interface Control Register */ - __IOM uint32_t PMR; /*!< \brief Offset: 0x004 (R/W) Interrupt Priority Mask Register */ - __IOM uint32_t BPR; /*!< \brief Offset: 0x008 (R/W) Binary Point Register */ - __IM uint32_t IAR; /*!< \brief Offset: 0x00C (R/ ) Interrupt Acknowledge Register */ - __OM uint32_t EOIR; /*!< \brief Offset: 0x010 ( /W) End Of Interrupt Register */ - __IM uint32_t RPR; /*!< \brief Offset: 0x014 (R/ ) Running Priority Register */ - __IM uint32_t HPPIR; /*!< \brief Offset: 0x018 (R/ ) Highest Priority Pending Interrupt Register */ - __IOM uint32_t ABPR; /*!< \brief Offset: 0x01C (R/W) Aliased Binary Point Register */ - __IM uint32_t AIAR; /*!< \brief Offset: 0x020 (R/ ) Aliased Interrupt Acknowledge Register */ - __OM uint32_t AEOIR; /*!< \brief Offset: 0x024 ( /W) Aliased End Of Interrupt Register */ - __IM uint32_t AHPPIR; /*!< \brief Offset: 0x028 (R/ ) Aliased Highest Priority Pending Interrupt Register */ - __IOM uint32_t STATUSR; /*!< \brief Offset: 0x02C (R/W) Error Reporting Status Register, optional */ - RESERVED(1[40], uint32_t) - __IOM uint32_t APR[4]; /*!< \brief Offset: 0x0D0 (R/W) Active Priority Register */ - __IOM uint32_t NSAPR[4]; /*!< \brief Offset: 0x0E0 (R/W) Non-secure Active Priority Register */ - RESERVED(2[3], uint32_t) - __IM uint32_t IIDR; /*!< \brief Offset: 0x0FC (R/ ) CPU Interface Identification Register */ - RESERVED(3[960], uint32_t) - __OM uint32_t DIR; /*!< \brief Offset: 0x1000( /W) Deactivate Interrupt Register */ -} GICInterface_Type; - -#define GICInterface ((GICInterface_Type *) GIC_INTERFACE_BASE ) /*!< \brief GIC Interface register set access pointer */ -#endif - -#if (__TIM_PRESENT == 1U) || defined(DOXYGEN) -#if ((__CORTEX_A == 5U) || (__CORTEX_A == 9U)) || defined(DOXYGEN) -/** \brief Structure type to access the Private Timer -*/ -typedef struct -{ - __IOM uint32_t LOAD; //!< \brief Offset: 0x000 (R/W) Private Timer Load Register - __IOM uint32_t COUNTER; //!< \brief Offset: 0x004 (R/W) Private Timer Counter Register - __IOM uint32_t CONTROL; //!< \brief Offset: 0x008 (R/W) Private Timer Control Register - __IOM uint32_t ISR; //!< \brief Offset: 0x00C (R/W) Private Timer Interrupt Status Register - RESERVED(0[4], uint32_t) - __IOM uint32_t WLOAD; //!< \brief Offset: 0x020 (R/W) Watchdog Load Register - __IOM uint32_t WCOUNTER; //!< \brief Offset: 0x024 (R/W) Watchdog Counter Register - __IOM uint32_t WCONTROL; //!< \brief Offset: 0x028 (R/W) Watchdog Control Register - __IOM uint32_t WISR; //!< \brief Offset: 0x02C (R/W) Watchdog Interrupt Status Register - __IOM uint32_t WRESET; //!< \brief Offset: 0x030 (R/W) Watchdog Reset Status Register - __OM uint32_t WDISABLE; //!< \brief Offset: 0x034 ( /W) Watchdog Disable Register -} Timer_Type; -#define PTIM ((Timer_Type *) TIMER_BASE ) /*!< \brief Timer register struct */ -#endif -#endif - - /******************************************************************************* - * Hardware Abstraction Layer - Core Function Interface contains: - - L1 Cache Functions - - L2C-310 Cache Controller Functions - - PL1 Timer Functions - - GIC Functions - - MMU Functions - ******************************************************************************/ - -/* ########################## L1 Cache functions ################################# */ - -/** \brief Enable Caches by setting I and C bits in SCTLR register. -*/ -__STATIC_FORCEINLINE void L1C_EnableCaches(void) { - __set_SCTLR( __get_SCTLR() | SCTLR_I_Msk | SCTLR_C_Msk); - __ISB(); -} - -/** \brief Disable Caches by clearing I and C bits in SCTLR register. -*/ -__STATIC_FORCEINLINE void L1C_DisableCaches(void) { - __set_SCTLR( __get_SCTLR() & (~SCTLR_I_Msk) & (~SCTLR_C_Msk)); - __ISB(); -} - -/** \brief Enable Branch Prediction by setting Z bit in SCTLR register. -*/ -__STATIC_FORCEINLINE void L1C_EnableBTAC(void) { - __set_SCTLR( __get_SCTLR() | SCTLR_Z_Msk); - __ISB(); -} - -/** \brief Disable Branch Prediction by clearing Z bit in SCTLR register. -*/ -__STATIC_FORCEINLINE void L1C_DisableBTAC(void) { - __set_SCTLR( __get_SCTLR() & (~SCTLR_Z_Msk)); - __ISB(); -} - -/** \brief Invalidate entire branch predictor array -*/ -__STATIC_FORCEINLINE void L1C_InvalidateBTAC(void) { - __set_BPIALL(0); - __DSB(); //ensure completion of the invalidation - __ISB(); //ensure instruction fetch path sees new state -} - -/** \brief Invalidate the whole instruction cache -*/ -__STATIC_FORCEINLINE void L1C_InvalidateICacheAll(void) { - __set_ICIALLU(0); - __DSB(); //ensure completion of the invalidation - __ISB(); //ensure instruction fetch path sees new I cache state -} - -/** \brief Clean data cache line by address. -* \param [in] va Pointer to data to clear the cache for. -*/ -__STATIC_FORCEINLINE void L1C_CleanDCacheMVA(void *va) { - __set_DCCMVAC((uint32_t)va); - __DMB(); //ensure the ordering of data cache maintenance operations and their effects -} - -/** \brief Invalidate data cache line by address. -* \param [in] va Pointer to data to invalidate the cache for. -*/ -__STATIC_FORCEINLINE void L1C_InvalidateDCacheMVA(void *va) { - __set_DCIMVAC((uint32_t)va); - __DMB(); //ensure the ordering of data cache maintenance operations and their effects -} - -/** \brief Clean and Invalidate data cache by address. -* \param [in] va Pointer to data to invalidate the cache for. -*/ -__STATIC_FORCEINLINE void L1C_CleanInvalidateDCacheMVA(void *va) { - __set_DCCIMVAC((uint32_t)va); - __DMB(); //ensure the ordering of data cache maintenance operations and their effects -} - -/** \brief Calculate log2 rounded up -* - log(0) => 0 -* - log(1) => 0 -* - log(2) => 1 -* - log(3) => 2 -* - log(4) => 2 -* - log(5) => 3 -* : : -* - log(16) => 4 -* - log(32) => 5 -* : : -* \param [in] n input value parameter -* \return log2(n) -*/ -__STATIC_FORCEINLINE uint8_t __log2_up(uint32_t n) -{ - if (n < 2U) { - return 0U; - } - uint8_t log = 0U; - uint32_t t = n; - while(t > 1U) - { - log++; - t >>= 1U; - } - if (n & 1U) { log++; } - return log; -} - -/** \brief Apply cache maintenance to given cache level. -* \param [in] level cache level to be maintained -* \param [in] maint 0 - invalidate, 1 - clean, otherwise - invalidate and clean -*/ -__STATIC_FORCEINLINE void __L1C_MaintainDCacheSetWay(uint32_t level, uint32_t maint) -{ - uint32_t Dummy; - uint32_t ccsidr; - uint32_t num_sets; - uint32_t num_ways; - uint32_t shift_way; - uint32_t log2_linesize; - int32_t log2_num_ways; - - Dummy = level << 1U; - /* set csselr, select ccsidr register */ - __set_CSSELR(Dummy); - /* get current ccsidr register */ - ccsidr = __get_CCSIDR(); - num_sets = ((ccsidr & 0x0FFFE000U) >> 13U) + 1U; - num_ways = ((ccsidr & 0x00001FF8U) >> 3U) + 1U; - log2_linesize = (ccsidr & 0x00000007U) + 2U + 2U; - log2_num_ways = __log2_up(num_ways); - if ((log2_num_ways < 0) || (log2_num_ways > 32)) { - return; // FATAL ERROR - } - shift_way = 32U - (uint32_t)log2_num_ways; - for(int32_t way = num_ways-1; way >= 0; way--) - { - for(int32_t set = num_sets-1; set >= 0; set--) - { - Dummy = (level << 1U) | (((uint32_t)set) << log2_linesize) | (((uint32_t)way) << shift_way); - switch (maint) - { - case 0U: __set_DCISW(Dummy); break; - case 1U: __set_DCCSW(Dummy); break; - default: __set_DCCISW(Dummy); break; - } - } - } - __DMB(); -} - -/** \brief Clean and Invalidate the entire data or unified cache -* Generic mechanism for cleaning/invalidating the entire data or unified cache to the point of coherency -* \param [in] op 0 - invalidate, 1 - clean, otherwise - invalidate and clean -*/ -__STATIC_FORCEINLINE void L1C_CleanInvalidateCache(uint32_t op) { - uint32_t clidr; - uint32_t cache_type; - clidr = __get_CLIDR(); - for(uint32_t i = 0U; i<7U; i++) - { - cache_type = (clidr >> i*3U) & 0x7UL; - if ((cache_type >= 2U) && (cache_type <= 4U)) - { - __L1C_MaintainDCacheSetWay(i, op); - } - } -} - -/** \brief Clean and Invalidate the entire data or unified cache -* Generic mechanism for cleaning/invalidating the entire data or unified cache to the point of coherency -* \param [in] op 0 - invalidate, 1 - clean, otherwise - invalidate and clean -* \deprecated Use generic L1C_CleanInvalidateCache instead. -*/ -CMSIS_DEPRECATED -__STATIC_FORCEINLINE void __L1C_CleanInvalidateCache(uint32_t op) { - L1C_CleanInvalidateCache(op); -} - -/** \brief Invalidate the whole data cache. -*/ -__STATIC_FORCEINLINE void L1C_InvalidateDCacheAll(void) { - L1C_CleanInvalidateCache(0); -} - -/** \brief Clean the whole data cache. - */ -__STATIC_FORCEINLINE void L1C_CleanDCacheAll(void) { - L1C_CleanInvalidateCache(1); -} - -/** \brief Clean and invalidate the whole data cache. - */ -__STATIC_FORCEINLINE void L1C_CleanInvalidateDCacheAll(void) { - L1C_CleanInvalidateCache(2); -} - -/* ########################## L2 Cache functions ################################# */ -#if (__L2C_PRESENT == 1U) || defined(DOXYGEN) -/** \brief Cache Sync operation by writing CACHE_SYNC register. -*/ -__STATIC_INLINE void L2C_Sync(void) -{ - L2C_310->CACHE_SYNC = 0x0; -} - -/** \brief Read cache controller cache ID from CACHE_ID register. - * \return L2C_310_TypeDef::CACHE_ID - */ -__STATIC_INLINE int L2C_GetID (void) -{ - return L2C_310->CACHE_ID; -} - -/** \brief Read cache controller cache type from CACHE_TYPE register. -* \return L2C_310_TypeDef::CACHE_TYPE -*/ -__STATIC_INLINE int L2C_GetType (void) -{ - return L2C_310->CACHE_TYPE; -} - -/** \brief Invalidate all cache by way -*/ -__STATIC_INLINE void L2C_InvAllByWay (void) -{ - unsigned int assoc; - - if (L2C_310->AUX_CNT & (1U << 16U)) { - assoc = 16U; - } else { - assoc = 8U; - } - - L2C_310->INV_WAY = (1U << assoc) - 1U; - while(L2C_310->INV_WAY & ((1U << assoc) - 1U)); //poll invalidate - - L2C_Sync(); -} - -/** \brief Clean and Invalidate all cache by way -*/ -__STATIC_INLINE void L2C_CleanInvAllByWay (void) -{ - unsigned int assoc; - - if (L2C_310->AUX_CNT & (1U << 16U)) { - assoc = 16U; - } else { - assoc = 8U; - } - - L2C_310->CLEAN_INV_WAY = (1U << assoc) - 1U; - while(L2C_310->CLEAN_INV_WAY & ((1U << assoc) - 1U)); //poll invalidate - - L2C_Sync(); -} - -/** \brief Enable Level 2 Cache -*/ -__STATIC_INLINE void L2C_Enable(void) -{ - L2C_310->CONTROL = 0; - L2C_310->INTERRUPT_CLEAR = 0x000001FFuL; - L2C_310->DEBUG_CONTROL = 0; - L2C_310->DATA_LOCK_0_WAY = 0; - L2C_310->CACHE_SYNC = 0; - L2C_310->CONTROL = 0x01; - L2C_Sync(); -} - -/** \brief Disable Level 2 Cache -*/ -__STATIC_INLINE void L2C_Disable(void) -{ - L2C_310->CONTROL = 0x00; - L2C_Sync(); -} - -/** \brief Invalidate cache by physical address -* \param [in] pa Pointer to data to invalidate cache for. -*/ -__STATIC_INLINE void L2C_InvPa (void *pa) -{ - L2C_310->INV_LINE_PA = (unsigned int)pa; - L2C_Sync(); -} - -/** \brief Clean cache by physical address -* \param [in] pa Pointer to data to invalidate cache for. -*/ -__STATIC_INLINE void L2C_CleanPa (void *pa) -{ - L2C_310->CLEAN_LINE_PA = (unsigned int)pa; - L2C_Sync(); -} - -/** \brief Clean and invalidate cache by physical address -* \param [in] pa Pointer to data to invalidate cache for. -*/ -__STATIC_INLINE void L2C_CleanInvPa (void *pa) -{ - L2C_310->CLEAN_INV_LINE_PA = (unsigned int)pa; - L2C_Sync(); -} -#endif - -/* ########################## GIC functions ###################################### */ -#if (__GIC_PRESENT == 1U) || defined(DOXYGEN) - -/** \brief Enable the interrupt distributor using the GIC's CTLR register. -*/ -__STATIC_INLINE void GIC_EnableDistributor(void) -{ - GICDistributor->CTLR |= 1U; -} - -/** \brief Disable the interrupt distributor using the GIC's CTLR register. -*/ -__STATIC_INLINE void GIC_DisableDistributor(void) -{ - GICDistributor->CTLR &=~1U; -} - -/** \brief Read the GIC's TYPER register. -* \return GICDistributor_Type::TYPER -*/ -__STATIC_INLINE uint32_t GIC_DistributorInfo(void) -{ - return (GICDistributor->TYPER); -} - -/** \brief Reads the GIC's IIDR register. -* \return GICDistributor_Type::IIDR -*/ -__STATIC_INLINE uint32_t GIC_DistributorImplementer(void) -{ - return (GICDistributor->IIDR); -} - -/** \brief Sets the GIC's ITARGETSR register for the given interrupt. -* \param [in] IRQn Interrupt to be configured. -* \param [in] cpu_target CPU interfaces to assign this interrupt to. -*/ -__STATIC_INLINE void GIC_SetTarget(IRQn_Type IRQn, uint32_t cpu_target) -{ - uint32_t mask = GICDistributor->ITARGETSR[IRQn / 4U] & ~(0xFFUL << ((IRQn % 4U) * 8U)); - GICDistributor->ITARGETSR[IRQn / 4U] = mask | ((cpu_target & 0xFFUL) << ((IRQn % 4U) * 8U)); -} - -/** \brief Read the GIC's ITARGETSR register. -* \param [in] IRQn Interrupt to acquire the configuration for. -* \return GICDistributor_Type::ITARGETSR -*/ -__STATIC_INLINE uint32_t GIC_GetTarget(IRQn_Type IRQn) -{ - return (GICDistributor->ITARGETSR[IRQn / 4U] >> ((IRQn % 4U) * 8U)) & 0xFFUL; -} - -/** \brief Enable the CPU's interrupt interface. -*/ -__STATIC_INLINE void GIC_EnableInterface(void) -{ - GICInterface->CTLR |= 1U; //enable interface -} - -/** \brief Disable the CPU's interrupt interface. -*/ -__STATIC_INLINE void GIC_DisableInterface(void) -{ - GICInterface->CTLR &=~1U; //disable distributor -} - -/** \brief Read the CPU's IAR register. -* \return GICInterface_Type::IAR -*/ -__STATIC_INLINE IRQn_Type GIC_AcknowledgePending(void) -{ - return (IRQn_Type)(GICInterface->IAR); -} - -/** \brief Writes the given interrupt number to the CPU's EOIR register. -* \param [in] IRQn The interrupt to be signaled as finished. -*/ -__STATIC_INLINE void GIC_EndInterrupt(IRQn_Type IRQn) -{ - GICInterface->EOIR = IRQn; -} - -/** \brief Enables the given interrupt using GIC's ISENABLER register. -* \param [in] IRQn The interrupt to be enabled. -*/ -__STATIC_INLINE void GIC_EnableIRQ(IRQn_Type IRQn) -{ - GICDistributor->ISENABLER[IRQn / 32U] = 1U << (IRQn % 32U); -} - -/** \brief Get interrupt enable status using GIC's ISENABLER register. -* \param [in] IRQn The interrupt to be queried. -* \return 0 - interrupt is not enabled, 1 - interrupt is enabled. -*/ -__STATIC_INLINE uint32_t GIC_GetEnableIRQ(IRQn_Type IRQn) -{ - return (GICDistributor->ISENABLER[IRQn / 32U] >> (IRQn % 32U)) & 1UL; -} - -/** \brief Disables the given interrupt using GIC's ICENABLER register. -* \param [in] IRQn The interrupt to be disabled. -*/ -__STATIC_INLINE void GIC_DisableIRQ(IRQn_Type IRQn) -{ - GICDistributor->ICENABLER[IRQn / 32U] = 1U << (IRQn % 32U); -} - -/** \brief Get interrupt pending status from GIC's ISPENDR register. -* \param [in] IRQn The interrupt to be queried. -* \return 0 - interrupt is not pending, 1 - interrupt is pendig. -*/ -__STATIC_INLINE uint32_t GIC_GetPendingIRQ(IRQn_Type IRQn) -{ - uint32_t pend; - - if (IRQn >= 16U) { - pend = (GICDistributor->ISPENDR[IRQn / 32U] >> (IRQn % 32U)) & 1UL; - } else { - // INTID 0-15 Software Generated Interrupt - pend = (GICDistributor->SPENDSGIR[IRQn / 4U] >> ((IRQn % 4U) * 8U)) & 0xFFUL; - // No CPU identification offered - if (pend != 0U) { - pend = 1U; - } else { - pend = 0U; - } - } - - return (pend); -} - -/** \brief Sets the given interrupt as pending using GIC's ISPENDR register. -* \param [in] IRQn The interrupt to be enabled. -*/ -__STATIC_INLINE void GIC_SetPendingIRQ(IRQn_Type IRQn) -{ - if (IRQn >= 16U) { - GICDistributor->ISPENDR[IRQn / 32U] = 1U << (IRQn % 32U); - } else { - // INTID 0-15 Software Generated Interrupt - // Forward the interrupt to the CPU interface that requested it - GICDistributor->SGIR = (IRQn | 0x02000000U); - } -} - -/** \brief Clears the given interrupt from being pending using GIC's ICPENDR register. -* \param [in] IRQn The interrupt to be enabled. -*/ -__STATIC_INLINE void GIC_ClearPendingIRQ(IRQn_Type IRQn) -{ - if (IRQn >= 16U) { - GICDistributor->ICPENDR[IRQn / 32U] = 1U << (IRQn % 32U); - } else { - // INTID 0-15 Software Generated Interrupt - GICDistributor->CPENDSGIR[IRQn / 4U] = 1U << ((IRQn % 4U) * 8U); - } -} - -/** \brief Sets the interrupt configuration using GIC's ICFGR register. -* \param [in] IRQn The interrupt to be configured. -* \param [in] int_config Int_config field value. Bit 0: Reserved (0 - N-N model, 1 - 1-N model for some GIC before v1) -* Bit 1: 0 - level sensitive, 1 - edge triggered -*/ -__STATIC_INLINE void GIC_SetConfiguration(IRQn_Type IRQn, uint32_t int_config) -{ - uint32_t icfgr = GICDistributor->ICFGR[IRQn / 16U]; - uint32_t shift = (IRQn % 16U) << 1U; - - icfgr &= (~(3U << shift)); - icfgr |= ( int_config << shift); - - GICDistributor->ICFGR[IRQn / 16U] = icfgr; -} - -/** \brief Get the interrupt configuration from the GIC's ICFGR register. -* \param [in] IRQn Interrupt to acquire the configuration for. -* \return Int_config field value. Bit 0: Reserved (0 - N-N model, 1 - 1-N model for some GIC before v1) -* Bit 1: 0 - level sensitive, 1 - edge triggered -*/ -__STATIC_INLINE uint32_t GIC_GetConfiguration(IRQn_Type IRQn) -{ - return (GICDistributor->ICFGR[IRQn / 16U] >> ((IRQn % 16U) >> 1U)); -} - -/** \brief Set the priority for the given interrupt in the GIC's IPRIORITYR register. -* \param [in] IRQn The interrupt to be configured. -* \param [in] priority The priority for the interrupt, lower values denote higher priorities. -*/ -__STATIC_INLINE void GIC_SetPriority(IRQn_Type IRQn, uint32_t priority) -{ - uint32_t mask = GICDistributor->IPRIORITYR[IRQn / 4U] & ~(0xFFUL << ((IRQn % 4U) * 8U)); - GICDistributor->IPRIORITYR[IRQn / 4U] = mask | ((priority & 0xFFUL) << ((IRQn % 4U) * 8U)); -} - -/** \brief Read the current interrupt priority from GIC's IPRIORITYR register. -* \param [in] IRQn The interrupt to be queried. -*/ -__STATIC_INLINE uint32_t GIC_GetPriority(IRQn_Type IRQn) -{ - return (GICDistributor->IPRIORITYR[IRQn / 4U] >> ((IRQn % 4U) * 8U)) & 0xFFUL; -} - -/** \brief Set the interrupt priority mask using CPU's PMR register. -* \param [in] priority Priority mask to be set. -*/ -__STATIC_INLINE void GIC_SetInterfacePriorityMask(uint32_t priority) -{ - GICInterface->PMR = priority & 0xFFUL; //set priority mask -} - -/** \brief Read the current interrupt priority mask from CPU's PMR register. -* \result GICInterface_Type::PMR -*/ -__STATIC_INLINE uint32_t GIC_GetInterfacePriorityMask(void) -{ - return GICInterface->PMR; -} - -/** \brief Configures the group priority and subpriority split point using CPU's BPR register. -* \param [in] binary_point Amount of bits used as subpriority. -*/ -__STATIC_INLINE void GIC_SetBinaryPoint(uint32_t binary_point) -{ - GICInterface->BPR = binary_point & 7U; //set binary point -} - -/** \brief Read the current group priority and subpriority split point from CPU's BPR register. -* \return GICInterface_Type::BPR -*/ -__STATIC_INLINE uint32_t GIC_GetBinaryPoint(void) -{ - return GICInterface->BPR; -} - -/** \brief Get the status for a given interrupt. -* \param [in] IRQn The interrupt to get status for. -* \return 0 - not pending/active, 1 - pending, 2 - active, 3 - pending and active -*/ -__STATIC_INLINE uint32_t GIC_GetIRQStatus(IRQn_Type IRQn) -{ - uint32_t pending, active; - - active = ((GICDistributor->ISACTIVER[IRQn / 32U]) >> (IRQn % 32U)) & 1UL; - pending = ((GICDistributor->ISPENDR[IRQn / 32U]) >> (IRQn % 32U)) & 1UL; - - return ((active<<1U) | pending); -} - -/** \brief Generate a software interrupt using GIC's SGIR register. -* \param [in] IRQn Software interrupt to be generated. -* \param [in] target_list List of CPUs the software interrupt should be forwarded to. -* \param [in] filter_list Filter to be applied to determine interrupt receivers. -*/ -__STATIC_INLINE void GIC_SendSGI(IRQn_Type IRQn, uint32_t target_list, uint32_t filter_list) -{ - GICDistributor->SGIR = ((filter_list & 3U) << 24U) | ((target_list & 0xFFUL) << 16U) | (IRQn & 0x0FUL); -} - -/** \brief Get the interrupt number of the highest interrupt pending from CPU's HPPIR register. -* \return GICInterface_Type::HPPIR -*/ -__STATIC_INLINE uint32_t GIC_GetHighPendingIRQ(void) -{ - return GICInterface->HPPIR; -} - -/** \brief Provides information about the implementer and revision of the CPU interface. -* \return GICInterface_Type::IIDR -*/ -__STATIC_INLINE uint32_t GIC_GetInterfaceId(void) -{ - return GICInterface->IIDR; -} - -/** \brief Set the interrupt group from the GIC's IGROUPR register. -* \param [in] IRQn The interrupt to be queried. -* \param [in] group Interrupt group number: 0 - Group 0, 1 - Group 1 -*/ -__STATIC_INLINE void GIC_SetGroup(IRQn_Type IRQn, uint32_t group) -{ - uint32_t igroupr = GICDistributor->IGROUPR[IRQn / 32U]; - uint32_t shift = (IRQn % 32U); - - igroupr &= (~(1U << shift)); - igroupr |= ( (group & 1U) << shift); - - GICDistributor->IGROUPR[IRQn / 32U] = igroupr; -} -#define GIC_SetSecurity GIC_SetGroup - -/** \brief Get the interrupt group from the GIC's IGROUPR register. -* \param [in] IRQn The interrupt to be queried. -* \return 0 - Group 0, 1 - Group 1 -*/ -__STATIC_INLINE uint32_t GIC_GetGroup(IRQn_Type IRQn) -{ - return (GICDistributor->IGROUPR[IRQn / 32U] >> (IRQn % 32U)) & 1UL; -} -#define GIC_GetSecurity GIC_GetGroup - -/** \brief Initialize the interrupt distributor. -*/ -__STATIC_INLINE void GIC_DistInit(void) -{ - uint32_t i; - uint32_t num_irq = 0U; - uint32_t priority_field; - - //A reset sets all bits in the IGROUPRs corresponding to the SPIs to 0, - //configuring all of the interrupts as Secure. - - //Disable interrupt forwarding - GIC_DisableDistributor(); - //Get the maximum number of interrupts that the GIC supports - num_irq = 32U * ((GIC_DistributorInfo() & 0x1FU) + 1U); - - /* Priority level is implementation defined. - To determine the number of priority bits implemented write 0xFF to an IPRIORITYR - priority field and read back the value stored.*/ - GIC_SetPriority((IRQn_Type)0U, 0xFFU); - priority_field = GIC_GetPriority((IRQn_Type)0U); - - for (i = 32U; i < num_irq; i++) - { - //Disable the SPI interrupt - GIC_DisableIRQ((IRQn_Type)i); - //Set level-sensitive (and N-N model) - GIC_SetConfiguration((IRQn_Type)i, 0U); - //Set priority - GIC_SetPriority((IRQn_Type)i, priority_field/2U); - //Set target list to CPU0 - GIC_SetTarget((IRQn_Type)i, 1U); - } - //Enable distributor - GIC_EnableDistributor(); -} - -/** \brief Initialize the CPU's interrupt interface -*/ -__STATIC_INLINE void GIC_CPUInterfaceInit(void) -{ - uint32_t i; - uint32_t priority_field; - - //A reset sets all bits in the IGROUPRs corresponding to the SPIs to 0, - //configuring all of the interrupts as Secure. - - //Disable interrupt forwarding - GIC_DisableInterface(); - - /* Priority level is implementation defined. - To determine the number of priority bits implemented write 0xFF to an IPRIORITYR - priority field and read back the value stored.*/ - GIC_SetPriority((IRQn_Type)0U, 0xFFU); - priority_field = GIC_GetPriority((IRQn_Type)0U); - - //SGI and PPI - for (i = 0U; i < 32U; i++) - { - if(i > 15U) { - //Set level-sensitive (and N-N model) for PPI - GIC_SetConfiguration((IRQn_Type)i, 0U); - } - //Disable SGI and PPI interrupts - GIC_DisableIRQ((IRQn_Type)i); - //Set priority - GIC_SetPriority((IRQn_Type)i, priority_field/2U); - } - //Enable interface - GIC_EnableInterface(); - //Set binary point to 0 - GIC_SetBinaryPoint(0U); - //Set priority mask - GIC_SetInterfacePriorityMask(0xFFU); -} - -/** \brief Initialize and enable the GIC -*/ -__STATIC_INLINE void GIC_Enable(void) -{ - GIC_DistInit(); - GIC_CPUInterfaceInit(); //per CPU -} -#endif - -/* ########################## Generic Timer functions ############################ */ -#if (__TIM_PRESENT == 1U) || defined(DOXYGEN) - -/* PL1 Physical Timer */ -#if (__CORTEX_A == 7U) || defined(DOXYGEN) - -/** \brief Physical Timer Control register */ -typedef union -{ - struct - { - uint32_t ENABLE:1; /*!< \brief bit: 0 Enables the timer. */ - uint32_t IMASK:1; /*!< \brief bit: 1 Timer output signal mask bit. */ - uint32_t ISTATUS:1; /*!< \brief bit: 2 The status of the timer. */ - RESERVED(0:29, uint32_t) - } b; /*!< \brief Structure used for bit access */ - uint32_t w; /*!< \brief Type used for word access */ -} CNTP_CTL_Type; - -/** \brief Configures the frequency the timer shall run at. -* \param [in] value The timer frequency in Hz. -*/ -__STATIC_INLINE void PL1_SetCounterFrequency(uint32_t value) -{ - __set_CNTFRQ(value); - __ISB(); -} - -/** \brief Sets the reset value of the timer. -* \param [in] value The value the timer is loaded with. -*/ -__STATIC_INLINE void PL1_SetLoadValue(uint32_t value) -{ - __set_CNTP_TVAL(value); - __ISB(); -} - -/** \brief Get the current counter value. -* \return Current counter value. -*/ -__STATIC_INLINE uint32_t PL1_GetCurrentValue(void) -{ - return(__get_CNTP_TVAL()); -} - -/** \brief Get the current physical counter value. -* \return Current physical counter value. -*/ -__STATIC_INLINE uint64_t PL1_GetCurrentPhysicalValue(void) -{ - return(__get_CNTPCT()); -} - -/** \brief Set the physical compare value. -* \param [in] value New physical timer compare value. -*/ -__STATIC_INLINE void PL1_SetPhysicalCompareValue(uint64_t value) -{ - __set_CNTP_CVAL(value); - __ISB(); -} - -/** \brief Get the physical compare value. -* \return Physical compare value. -*/ -__STATIC_INLINE uint64_t PL1_GetPhysicalCompareValue(void) -{ - return(__get_CNTP_CVAL()); -} - -/** \brief Configure the timer by setting the control value. -* \param [in] value New timer control value. -*/ -__STATIC_INLINE void PL1_SetControl(uint32_t value) -{ - __set_CNTP_CTL(value); - __ISB(); -} - -/** \brief Get the control value. -* \return Control value. -*/ -__STATIC_INLINE uint32_t PL1_GetControl(void) -{ - return(__get_CNTP_CTL()); -} -#endif - -/* Private Timer */ -#if ((__CORTEX_A == 5U) || (__CORTEX_A == 9U)) || defined(DOXYGEN) -/** \brief Set the load value to timers LOAD register. -* \param [in] value The load value to be set. -*/ -__STATIC_INLINE void PTIM_SetLoadValue(uint32_t value) -{ - PTIM->LOAD = value; -} - -/** \brief Get the load value from timers LOAD register. -* \return Timer_Type::LOAD -*/ -__STATIC_INLINE uint32_t PTIM_GetLoadValue(void) -{ - return(PTIM->LOAD); -} - -/** \brief Set current counter value from its COUNTER register. -*/ -__STATIC_INLINE void PTIM_SetCurrentValue(uint32_t value) -{ - PTIM->COUNTER = value; -} - -/** \brief Get current counter value from timers COUNTER register. -* \result Timer_Type::COUNTER -*/ -__STATIC_INLINE uint32_t PTIM_GetCurrentValue(void) -{ - return(PTIM->COUNTER); -} - -/** \brief Configure the timer using its CONTROL register. -* \param [in] value The new configuration value to be set. -*/ -__STATIC_INLINE void PTIM_SetControl(uint32_t value) -{ - PTIM->CONTROL = value; -} - -/** ref Timer_Type::CONTROL Get the current timer configuration from its CONTROL register. -* \return Timer_Type::CONTROL -*/ -__STATIC_INLINE uint32_t PTIM_GetControl(void) -{ - return(PTIM->CONTROL); -} - -/** ref Timer_Type::CONTROL Get the event flag in timers ISR register. -* \return 0 - flag is not set, 1- flag is set -*/ -__STATIC_INLINE uint32_t PTIM_GetEventFlag(void) -{ - return (PTIM->ISR & 1UL); -} - -/** ref Timer_Type::CONTROL Clears the event flag in timers ISR register. -*/ -__STATIC_INLINE void PTIM_ClearEventFlag(void) -{ - PTIM->ISR = 1; -} -#endif -#endif - -/* ########################## MMU functions ###################################### */ - -#define SECTION_DESCRIPTOR (0x2) -#define SECTION_MASK (0xFFFFFFFC) - -#define SECTION_TEXCB_MASK (0xFFFF8FF3) -#define SECTION_B_SHIFT (2) -#define SECTION_C_SHIFT (3) -#define SECTION_TEX0_SHIFT (12) -#define SECTION_TEX1_SHIFT (13) -#define SECTION_TEX2_SHIFT (14) - -#define SECTION_XN_MASK (0xFFFFFFEF) -#define SECTION_XN_SHIFT (4) - -#define SECTION_DOMAIN_MASK (0xFFFFFE1F) -#define SECTION_DOMAIN_SHIFT (5) - -#define SECTION_P_MASK (0xFFFFFDFF) -#define SECTION_P_SHIFT (9) - -#define SECTION_AP_MASK (0xFFFF73FF) -#define SECTION_AP_SHIFT (10) -#define SECTION_AP2_SHIFT (15) - -#define SECTION_S_MASK (0xFFFEFFFF) -#define SECTION_S_SHIFT (16) - -#define SECTION_NG_MASK (0xFFFDFFFF) -#define SECTION_NG_SHIFT (17) - -#define SECTION_NS_MASK (0xFFF7FFFF) -#define SECTION_NS_SHIFT (19) - -#define PAGE_L1_DESCRIPTOR (0x1) -#define PAGE_L1_MASK (0xFFFFFFFC) - -#define PAGE_L2_4K_DESC (0x2) -#define PAGE_L2_4K_MASK (0xFFFFFFFD) - -#define PAGE_L2_64K_DESC (0x1) -#define PAGE_L2_64K_MASK (0xFFFFFFFC) - -#define PAGE_4K_TEXCB_MASK (0xFFFFFE33) -#define PAGE_4K_B_SHIFT (2) -#define PAGE_4K_C_SHIFT (3) -#define PAGE_4K_TEX0_SHIFT (6) -#define PAGE_4K_TEX1_SHIFT (7) -#define PAGE_4K_TEX2_SHIFT (8) - -#define PAGE_64K_TEXCB_MASK (0xFFFF8FF3) -#define PAGE_64K_B_SHIFT (2) -#define PAGE_64K_C_SHIFT (3) -#define PAGE_64K_TEX0_SHIFT (12) -#define PAGE_64K_TEX1_SHIFT (13) -#define PAGE_64K_TEX2_SHIFT (14) - -#define PAGE_TEXCB_MASK (0xFFFF8FF3) -#define PAGE_B_SHIFT (2) -#define PAGE_C_SHIFT (3) -#define PAGE_TEX_SHIFT (12) - -#define PAGE_XN_4K_MASK (0xFFFFFFFE) -#define PAGE_XN_4K_SHIFT (0) -#define PAGE_XN_64K_MASK (0xFFFF7FFF) -#define PAGE_XN_64K_SHIFT (15) - -#define PAGE_DOMAIN_MASK (0xFFFFFE1F) -#define PAGE_DOMAIN_SHIFT (5) - -#define PAGE_P_MASK (0xFFFFFDFF) -#define PAGE_P_SHIFT (9) - -#define PAGE_AP_MASK (0xFFFFFDCF) -#define PAGE_AP_SHIFT (4) -#define PAGE_AP2_SHIFT (9) - -#define PAGE_S_MASK (0xFFFFFBFF) -#define PAGE_S_SHIFT (10) - -#define PAGE_NG_MASK (0xFFFFF7FF) -#define PAGE_NG_SHIFT (11) - -#define PAGE_NS_MASK (0xFFFFFFF7) -#define PAGE_NS_SHIFT (3) - -#define OFFSET_1M (0x00100000) -#define OFFSET_64K (0x00010000) -#define OFFSET_4K (0x00001000) - -#define DESCRIPTOR_FAULT (0x00000000) - -/* Attributes enumerations */ - -/* Region size attributes */ -typedef enum -{ - SECTION, - PAGE_4k, - PAGE_64k, -} mmu_region_size_Type; - -/* Region type attributes */ -typedef enum -{ - NORMAL, - DEVICE, - SHARED_DEVICE, - NON_SHARED_DEVICE, - STRONGLY_ORDERED -} mmu_memory_Type; - -/* Region cacheability attributes */ -typedef enum -{ - NON_CACHEABLE, - WB_WA, - WT, - WB_NO_WA, -} mmu_cacheability_Type; - -/* Region parity check attributes */ -typedef enum -{ - ECC_DISABLED, - ECC_ENABLED, -} mmu_ecc_check_Type; - -/* Region execution attributes */ -typedef enum -{ - EXECUTE, - NON_EXECUTE, -} mmu_execute_Type; - -/* Region global attributes */ -typedef enum -{ - GLOBAL, - NON_GLOBAL, -} mmu_global_Type; - -/* Region shareability attributes */ -typedef enum -{ - NON_SHARED, - SHARED, -} mmu_shared_Type; - -/* Region security attributes */ -typedef enum -{ - SECURE, - NON_SECURE, -} mmu_secure_Type; - -/* Region access attributes */ -typedef enum -{ - NO_ACCESS, - RW, - READ, -} mmu_access_Type; - -/* Memory Region definition */ -typedef struct RegionStruct { - mmu_region_size_Type rg_t; - mmu_memory_Type mem_t; - uint8_t domain; - mmu_cacheability_Type inner_norm_t; - mmu_cacheability_Type outer_norm_t; - mmu_ecc_check_Type e_t; - mmu_execute_Type xn_t; - mmu_global_Type g_t; - mmu_secure_Type sec_t; - mmu_access_Type priv_t; - mmu_access_Type user_t; - mmu_shared_Type sh_t; - -} mmu_region_attributes_Type; - -//Following macros define the descriptors and attributes -//Sect_Normal. Outer & inner wb/wa, non-shareable, executable, rw, domain 0 -#define section_normal(descriptor_l1, region) region.rg_t = SECTION; \ - region.domain = 0x0; \ - region.e_t = ECC_DISABLED; \ - region.g_t = GLOBAL; \ - region.inner_norm_t = WB_WA; \ - region.outer_norm_t = WB_WA; \ - region.mem_t = NORMAL; \ - region.sec_t = SECURE; \ - region.xn_t = EXECUTE; \ - region.priv_t = RW; \ - region.user_t = RW; \ - region.sh_t = NON_SHARED; \ - MMU_GetSectionDescriptor(&descriptor_l1, region); - -//Sect_Normal_NC. Outer & inner non-cacheable, non-shareable, executable, rw, domain 0 -#define section_normal_nc(descriptor_l1, region) region.rg_t = SECTION; \ - region.domain = 0x0; \ - region.e_t = ECC_DISABLED; \ - region.g_t = GLOBAL; \ - region.inner_norm_t = NON_CACHEABLE; \ - region.outer_norm_t = NON_CACHEABLE; \ - region.mem_t = NORMAL; \ - region.sec_t = SECURE; \ - region.xn_t = EXECUTE; \ - region.priv_t = RW; \ - region.user_t = RW; \ - region.sh_t = NON_SHARED; \ - MMU_GetSectionDescriptor(&descriptor_l1, region); - -//Sect_Normal_Cod. Outer & inner wb/wa, non-shareable, executable, ro, domain 0 -#define section_normal_cod(descriptor_l1, region) region.rg_t = SECTION; \ - region.domain = 0x0; \ - region.e_t = ECC_DISABLED; \ - region.g_t = GLOBAL; \ - region.inner_norm_t = WB_WA; \ - region.outer_norm_t = WB_WA; \ - region.mem_t = NORMAL; \ - region.sec_t = SECURE; \ - region.xn_t = EXECUTE; \ - region.priv_t = READ; \ - region.user_t = READ; \ - region.sh_t = NON_SHARED; \ - MMU_GetSectionDescriptor(&descriptor_l1, region); - -//Sect_Normal_RO. Sect_Normal_Cod, but not executable -#define section_normal_ro(descriptor_l1, region) region.rg_t = SECTION; \ - region.domain = 0x0; \ - region.e_t = ECC_DISABLED; \ - region.g_t = GLOBAL; \ - region.inner_norm_t = WB_WA; \ - region.outer_norm_t = WB_WA; \ - region.mem_t = NORMAL; \ - region.sec_t = SECURE; \ - region.xn_t = NON_EXECUTE; \ - region.priv_t = READ; \ - region.user_t = READ; \ - region.sh_t = NON_SHARED; \ - MMU_GetSectionDescriptor(&descriptor_l1, region); - -//Sect_Normal_RW. Sect_Normal_Cod, but writeable and not executable -#define section_normal_rw(descriptor_l1, region) region.rg_t = SECTION; \ - region.domain = 0x0; \ - region.e_t = ECC_DISABLED; \ - region.g_t = GLOBAL; \ - region.inner_norm_t = WB_WA; \ - region.outer_norm_t = WB_WA; \ - region.mem_t = NORMAL; \ - region.sec_t = SECURE; \ - region.xn_t = NON_EXECUTE; \ - region.priv_t = RW; \ - region.user_t = RW; \ - region.sh_t = NON_SHARED; \ - MMU_GetSectionDescriptor(&descriptor_l1, region); -//Sect_SO. Strongly-ordered (therefore shareable), not executable, rw, domain 0, base addr 0 -#define section_so(descriptor_l1, region) region.rg_t = SECTION; \ - region.domain = 0x0; \ - region.e_t = ECC_DISABLED; \ - region.g_t = GLOBAL; \ - region.inner_norm_t = NON_CACHEABLE; \ - region.outer_norm_t = NON_CACHEABLE; \ - region.mem_t = STRONGLY_ORDERED; \ - region.sec_t = SECURE; \ - region.xn_t = NON_EXECUTE; \ - region.priv_t = RW; \ - region.user_t = RW; \ - region.sh_t = NON_SHARED; \ - MMU_GetSectionDescriptor(&descriptor_l1, region); - -//Sect_Device_RO. Device, non-shareable, non-executable, ro, domain 0, base addr 0 -#define section_device_ro(descriptor_l1, region) region.rg_t = SECTION; \ - region.domain = 0x0; \ - region.e_t = ECC_DISABLED; \ - region.g_t = GLOBAL; \ - region.inner_norm_t = NON_CACHEABLE; \ - region.outer_norm_t = NON_CACHEABLE; \ - region.mem_t = STRONGLY_ORDERED; \ - region.sec_t = SECURE; \ - region.xn_t = NON_EXECUTE; \ - region.priv_t = READ; \ - region.user_t = READ; \ - region.sh_t = NON_SHARED; \ - MMU_GetSectionDescriptor(&descriptor_l1, region); - -//Sect_Device_RW. Sect_Device_RO, but writeable -#define section_device_rw(descriptor_l1, region) region.rg_t = SECTION; \ - region.domain = 0x0; \ - region.e_t = ECC_DISABLED; \ - region.g_t = GLOBAL; \ - region.inner_norm_t = NON_CACHEABLE; \ - region.outer_norm_t = NON_CACHEABLE; \ - region.mem_t = STRONGLY_ORDERED; \ - region.sec_t = SECURE; \ - region.xn_t = NON_EXECUTE; \ - region.priv_t = RW; \ - region.user_t = RW; \ - region.sh_t = NON_SHARED; \ - MMU_GetSectionDescriptor(&descriptor_l1, region); -//Page_4k_Device_RW. Shared device, not executable, rw, domain 0 -#define page4k_device_rw(descriptor_l1, descriptor_l2, region) region.rg_t = PAGE_4k; \ - region.domain = 0x0; \ - region.e_t = ECC_DISABLED; \ - region.g_t = GLOBAL; \ - region.inner_norm_t = NON_CACHEABLE; \ - region.outer_norm_t = NON_CACHEABLE; \ - region.mem_t = SHARED_DEVICE; \ - region.sec_t = SECURE; \ - region.xn_t = NON_EXECUTE; \ - region.priv_t = RW; \ - region.user_t = RW; \ - region.sh_t = NON_SHARED; \ - MMU_GetPageDescriptor(&descriptor_l1, &descriptor_l2, region); - -//Page_64k_Device_RW. Shared device, not executable, rw, domain 0 -#define page64k_device_rw(descriptor_l1, descriptor_l2, region) region.rg_t = PAGE_64k; \ - region.domain = 0x0; \ - region.e_t = ECC_DISABLED; \ - region.g_t = GLOBAL; \ - region.inner_norm_t = NON_CACHEABLE; \ - region.outer_norm_t = NON_CACHEABLE; \ - region.mem_t = SHARED_DEVICE; \ - region.sec_t = SECURE; \ - region.xn_t = NON_EXECUTE; \ - region.priv_t = RW; \ - region.user_t = RW; \ - region.sh_t = NON_SHARED; \ - MMU_GetPageDescriptor(&descriptor_l1, &descriptor_l2, region); - -/** \brief Set section execution-never attribute - - \param [out] descriptor_l1 L1 descriptor. - \param [in] xn Section execution-never attribute : EXECUTE , NON_EXECUTE. - - \return 0 -*/ -__STATIC_INLINE int MMU_XNSection(uint32_t *descriptor_l1, mmu_execute_Type xn) -{ - *descriptor_l1 &= SECTION_XN_MASK; - *descriptor_l1 |= ((xn & 0x1) << SECTION_XN_SHIFT); - return 0; -} - -/** \brief Set section domain - - \param [out] descriptor_l1 L1 descriptor. - \param [in] domain Section domain - - \return 0 -*/ -__STATIC_INLINE int MMU_DomainSection(uint32_t *descriptor_l1, uint8_t domain) -{ - *descriptor_l1 &= SECTION_DOMAIN_MASK; - *descriptor_l1 |= ((domain & 0xF) << SECTION_DOMAIN_SHIFT); - return 0; -} - -/** \brief Set section parity check - - \param [out] descriptor_l1 L1 descriptor. - \param [in] p_bit Parity check: ECC_DISABLED, ECC_ENABLED - - \return 0 -*/ -__STATIC_INLINE int MMU_PSection(uint32_t *descriptor_l1, mmu_ecc_check_Type p_bit) -{ - *descriptor_l1 &= SECTION_P_MASK; - *descriptor_l1 |= ((p_bit & 0x1) << SECTION_P_SHIFT); - return 0; -} - -/** \brief Set section access privileges - - \param [out] descriptor_l1 L1 descriptor. - \param [in] user User Level Access: NO_ACCESS, RW, READ - \param [in] priv Privilege Level Access: NO_ACCESS, RW, READ - \param [in] afe Access flag enable - - \return 0 -*/ -__STATIC_INLINE int MMU_APSection(uint32_t *descriptor_l1, mmu_access_Type user, mmu_access_Type priv, uint32_t afe) -{ - uint32_t ap = 0; - - if (afe == 0) { //full access - if ((priv == NO_ACCESS) && (user == NO_ACCESS)) { ap = 0x0; } - else if ((priv == RW) && (user == NO_ACCESS)) { ap = 0x1; } - else if ((priv == RW) && (user == READ)) { ap = 0x2; } - else if ((priv == RW) && (user == RW)) { ap = 0x3; } - else if ((priv == READ) && (user == NO_ACCESS)) { ap = 0x5; } - else if ((priv == READ) && (user == READ)) { ap = 0x7; } - } - - else { //Simplified access - if ((priv == RW) && (user == NO_ACCESS)) { ap = 0x1; } - else if ((priv == RW) && (user == RW)) { ap = 0x3; } - else if ((priv == READ) && (user == NO_ACCESS)) { ap = 0x5; } - else if ((priv == READ) && (user == READ)) { ap = 0x7; } - } - - *descriptor_l1 &= SECTION_AP_MASK; - *descriptor_l1 |= (ap & 0x3) << SECTION_AP_SHIFT; - *descriptor_l1 |= ((ap & 0x4)>>2) << SECTION_AP2_SHIFT; - - return 0; -} - -/** \brief Set section shareability - - \param [out] descriptor_l1 L1 descriptor. - \param [in] s_bit Section shareability: NON_SHARED, SHARED - - \return 0 -*/ -__STATIC_INLINE int MMU_SharedSection(uint32_t *descriptor_l1, mmu_shared_Type s_bit) -{ - *descriptor_l1 &= SECTION_S_MASK; - *descriptor_l1 |= ((s_bit & 0x1) << SECTION_S_SHIFT); - return 0; -} - -/** \brief Set section Global attribute - - \param [out] descriptor_l1 L1 descriptor. - \param [in] g_bit Section attribute: GLOBAL, NON_GLOBAL - - \return 0 -*/ -__STATIC_INLINE int MMU_GlobalSection(uint32_t *descriptor_l1, mmu_global_Type g_bit) -{ - *descriptor_l1 &= SECTION_NG_MASK; - *descriptor_l1 |= ((g_bit & 0x1) << SECTION_NG_SHIFT); - return 0; -} - -/** \brief Set section Security attribute - - \param [out] descriptor_l1 L1 descriptor. - \param [in] s_bit Section Security attribute: SECURE, NON_SECURE - - \return 0 -*/ -__STATIC_INLINE int MMU_SecureSection(uint32_t *descriptor_l1, mmu_secure_Type s_bit) -{ - *descriptor_l1 &= SECTION_NS_MASK; - *descriptor_l1 |= ((s_bit & 0x1) << SECTION_NS_SHIFT); - return 0; -} - -/* Page 4k or 64k */ -/** \brief Set 4k/64k page execution-never attribute - - \param [out] descriptor_l2 L2 descriptor. - \param [in] xn Page execution-never attribute : EXECUTE , NON_EXECUTE. - \param [in] page Page size: PAGE_4k, PAGE_64k, - - \return 0 -*/ -__STATIC_INLINE int MMU_XNPage(uint32_t *descriptor_l2, mmu_execute_Type xn, mmu_region_size_Type page) -{ - if (page == PAGE_4k) - { - *descriptor_l2 &= PAGE_XN_4K_MASK; - *descriptor_l2 |= ((xn & 0x1) << PAGE_XN_4K_SHIFT); - } - else - { - *descriptor_l2 &= PAGE_XN_64K_MASK; - *descriptor_l2 |= ((xn & 0x1) << PAGE_XN_64K_SHIFT); - } - return 0; -} - -/** \brief Set 4k/64k page domain - - \param [out] descriptor_l1 L1 descriptor. - \param [in] domain Page domain - - \return 0 -*/ -__STATIC_INLINE int MMU_DomainPage(uint32_t *descriptor_l1, uint8_t domain) -{ - *descriptor_l1 &= PAGE_DOMAIN_MASK; - *descriptor_l1 |= ((domain & 0xf) << PAGE_DOMAIN_SHIFT); - return 0; -} - -/** \brief Set 4k/64k page parity check - - \param [out] descriptor_l1 L1 descriptor. - \param [in] p_bit Parity check: ECC_DISABLED, ECC_ENABLED - - \return 0 -*/ -__STATIC_INLINE int MMU_PPage(uint32_t *descriptor_l1, mmu_ecc_check_Type p_bit) -{ - *descriptor_l1 &= SECTION_P_MASK; - *descriptor_l1 |= ((p_bit & 0x1) << SECTION_P_SHIFT); - return 0; -} - -/** \brief Set 4k/64k page access privileges - - \param [out] descriptor_l2 L2 descriptor. - \param [in] user User Level Access: NO_ACCESS, RW, READ - \param [in] priv Privilege Level Access: NO_ACCESS, RW, READ - \param [in] afe Access flag enable - - \return 0 -*/ -__STATIC_INLINE int MMU_APPage(uint32_t *descriptor_l2, mmu_access_Type user, mmu_access_Type priv, uint32_t afe) -{ - uint32_t ap = 0; - - if (afe == 0) { //full access - if ((priv == NO_ACCESS) && (user == NO_ACCESS)) { ap = 0x0; } - else if ((priv == RW) && (user == NO_ACCESS)) { ap = 0x1; } - else if ((priv == RW) && (user == READ)) { ap = 0x2; } - else if ((priv == RW) && (user == RW)) { ap = 0x3; } - else if ((priv == READ) && (user == NO_ACCESS)) { ap = 0x5; } - else if ((priv == READ) && (user == READ)) { ap = 0x6; } - } - - else { //Simplified access - if ((priv == RW) && (user == NO_ACCESS)) { ap = 0x1; } - else if ((priv == RW) && (user == RW)) { ap = 0x3; } - else if ((priv == READ) && (user == NO_ACCESS)) { ap = 0x5; } - else if ((priv == READ) && (user == READ)) { ap = 0x7; } - } - - *descriptor_l2 &= PAGE_AP_MASK; - *descriptor_l2 |= (ap & 0x3) << PAGE_AP_SHIFT; - *descriptor_l2 |= ((ap & 0x4)>>2) << PAGE_AP2_SHIFT; - - return 0; -} - -/** \brief Set 4k/64k page shareability - - \param [out] descriptor_l2 L2 descriptor. - \param [in] s_bit 4k/64k page shareability: NON_SHARED, SHARED - - \return 0 -*/ -__STATIC_INLINE int MMU_SharedPage(uint32_t *descriptor_l2, mmu_shared_Type s_bit) -{ - *descriptor_l2 &= PAGE_S_MASK; - *descriptor_l2 |= ((s_bit & 0x1) << PAGE_S_SHIFT); - return 0; -} - -/** \brief Set 4k/64k page Global attribute - - \param [out] descriptor_l2 L2 descriptor. - \param [in] g_bit 4k/64k page attribute: GLOBAL, NON_GLOBAL - - \return 0 -*/ -__STATIC_INLINE int MMU_GlobalPage(uint32_t *descriptor_l2, mmu_global_Type g_bit) -{ - *descriptor_l2 &= PAGE_NG_MASK; - *descriptor_l2 |= ((g_bit & 0x1) << PAGE_NG_SHIFT); - return 0; -} - -/** \brief Set 4k/64k page Security attribute - - \param [out] descriptor_l1 L1 descriptor. - \param [in] s_bit 4k/64k page Security attribute: SECURE, NON_SECURE - - \return 0 -*/ -__STATIC_INLINE int MMU_SecurePage(uint32_t *descriptor_l1, mmu_secure_Type s_bit) -{ - *descriptor_l1 &= PAGE_NS_MASK; - *descriptor_l1 |= ((s_bit & 0x1) << PAGE_NS_SHIFT); - return 0; -} - -/** \brief Set Section memory attributes - - \param [out] descriptor_l1 L1 descriptor. - \param [in] mem Section memory type: NORMAL, DEVICE, SHARED_DEVICE, NON_SHARED_DEVICE, STRONGLY_ORDERED - \param [in] outer Outer cacheability: NON_CACHEABLE, WB_WA, WT, WB_NO_WA, - \param [in] inner Inner cacheability: NON_CACHEABLE, WB_WA, WT, WB_NO_WA, - - \return 0 -*/ -__STATIC_INLINE int MMU_MemorySection(uint32_t *descriptor_l1, mmu_memory_Type mem, mmu_cacheability_Type outer, mmu_cacheability_Type inner) -{ - *descriptor_l1 &= SECTION_TEXCB_MASK; - - if (STRONGLY_ORDERED == mem) - { - return 0; - } - else if (SHARED_DEVICE == mem) - { - *descriptor_l1 |= (1 << SECTION_B_SHIFT); - } - else if (NON_SHARED_DEVICE == mem) - { - *descriptor_l1 |= (1 << SECTION_TEX1_SHIFT); - } - else if (NORMAL == mem) - { - *descriptor_l1 |= 1 << SECTION_TEX2_SHIFT; - switch(inner) - { - case NON_CACHEABLE: - break; - case WB_WA: - *descriptor_l1 |= (1 << SECTION_B_SHIFT); - break; - case WT: - *descriptor_l1 |= 1 << SECTION_C_SHIFT; - break; - case WB_NO_WA: - *descriptor_l1 |= (1 << SECTION_B_SHIFT) | (1 << SECTION_C_SHIFT); - break; - } - switch(outer) - { - case NON_CACHEABLE: - break; - case WB_WA: - *descriptor_l1 |= (1 << SECTION_TEX0_SHIFT); - break; - case WT: - *descriptor_l1 |= 1 << SECTION_TEX1_SHIFT; - break; - case WB_NO_WA: - *descriptor_l1 |= (1 << SECTION_TEX0_SHIFT) | (1 << SECTION_TEX0_SHIFT); - break; - } - } - return 0; -} - -/** \brief Set 4k/64k page memory attributes - - \param [out] descriptor_l2 L2 descriptor. - \param [in] mem 4k/64k page memory type: NORMAL, DEVICE, SHARED_DEVICE, NON_SHARED_DEVICE, STRONGLY_ORDERED - \param [in] outer Outer cacheability: NON_CACHEABLE, WB_WA, WT, WB_NO_WA, - \param [in] inner Inner cacheability: NON_CACHEABLE, WB_WA, WT, WB_NO_WA, - \param [in] page Page size - - \return 0 -*/ -__STATIC_INLINE int MMU_MemoryPage(uint32_t *descriptor_l2, mmu_memory_Type mem, mmu_cacheability_Type outer, mmu_cacheability_Type inner, mmu_region_size_Type page) -{ - *descriptor_l2 &= PAGE_4K_TEXCB_MASK; - - if (page == PAGE_64k) - { - //same as section - MMU_MemorySection(descriptor_l2, mem, outer, inner); - } - else - { - if (STRONGLY_ORDERED == mem) - { - return 0; - } - else if (SHARED_DEVICE == mem) - { - *descriptor_l2 |= (1 << PAGE_4K_B_SHIFT); - } - else if (NON_SHARED_DEVICE == mem) - { - *descriptor_l2 |= (1 << PAGE_4K_TEX1_SHIFT); - } - else if (NORMAL == mem) - { - *descriptor_l2 |= 1 << PAGE_4K_TEX2_SHIFT; - switch(inner) - { - case NON_CACHEABLE: - break; - case WB_WA: - *descriptor_l2 |= (1 << PAGE_4K_B_SHIFT); - break; - case WT: - *descriptor_l2 |= 1 << PAGE_4K_C_SHIFT; - break; - case WB_NO_WA: - *descriptor_l2 |= (1 << PAGE_4K_B_SHIFT) | (1 << PAGE_4K_C_SHIFT); - break; - } - switch(outer) - { - case NON_CACHEABLE: - break; - case WB_WA: - *descriptor_l2 |= (1 << PAGE_4K_TEX0_SHIFT); - break; - case WT: - *descriptor_l2 |= 1 << PAGE_4K_TEX1_SHIFT; - break; - case WB_NO_WA: - *descriptor_l2 |= (1 << PAGE_4K_TEX0_SHIFT) | (1 << PAGE_4K_TEX0_SHIFT); - break; - } - } - } - - return 0; -} - -/** \brief Create a L1 section descriptor - - \param [out] descriptor L1 descriptor - \param [in] reg Section attributes - - \return 0 -*/ -__STATIC_INLINE int MMU_GetSectionDescriptor(uint32_t *descriptor, mmu_region_attributes_Type reg) -{ - *descriptor = 0; - - MMU_MemorySection(descriptor, reg.mem_t, reg.outer_norm_t, reg.inner_norm_t); - MMU_XNSection(descriptor,reg.xn_t); - MMU_DomainSection(descriptor, reg.domain); - MMU_PSection(descriptor, reg.e_t); - MMU_APSection(descriptor, reg.priv_t, reg.user_t, 1); - MMU_SharedSection(descriptor,reg.sh_t); - MMU_GlobalSection(descriptor,reg.g_t); - MMU_SecureSection(descriptor,reg.sec_t); - *descriptor &= SECTION_MASK; - *descriptor |= SECTION_DESCRIPTOR; - - return 0; -} - - -/** \brief Create a L1 and L2 4k/64k page descriptor - - \param [out] descriptor L1 descriptor - \param [out] descriptor2 L2 descriptor - \param [in] reg 4k/64k page attributes - - \return 0 -*/ -__STATIC_INLINE int MMU_GetPageDescriptor(uint32_t *descriptor, uint32_t *descriptor2, mmu_region_attributes_Type reg) -{ - *descriptor = 0; - *descriptor2 = 0; - - switch (reg.rg_t) - { - case PAGE_4k: - MMU_MemoryPage(descriptor2, reg.mem_t, reg.outer_norm_t, reg.inner_norm_t, PAGE_4k); - MMU_XNPage(descriptor2, reg.xn_t, PAGE_4k); - MMU_DomainPage(descriptor, reg.domain); - MMU_PPage(descriptor, reg.e_t); - MMU_APPage(descriptor2, reg.priv_t, reg.user_t, 1); - MMU_SharedPage(descriptor2,reg.sh_t); - MMU_GlobalPage(descriptor2,reg.g_t); - MMU_SecurePage(descriptor,reg.sec_t); - *descriptor &= PAGE_L1_MASK; - *descriptor |= PAGE_L1_DESCRIPTOR; - *descriptor2 &= PAGE_L2_4K_MASK; - *descriptor2 |= PAGE_L2_4K_DESC; - break; - - case PAGE_64k: - MMU_MemoryPage(descriptor2, reg.mem_t, reg.outer_norm_t, reg.inner_norm_t, PAGE_64k); - MMU_XNPage(descriptor2, reg.xn_t, PAGE_64k); - MMU_DomainPage(descriptor, reg.domain); - MMU_PPage(descriptor, reg.e_t); - MMU_APPage(descriptor2, reg.priv_t, reg.user_t, 1); - MMU_SharedPage(descriptor2,reg.sh_t); - MMU_GlobalPage(descriptor2,reg.g_t); - MMU_SecurePage(descriptor,reg.sec_t); - *descriptor &= PAGE_L1_MASK; - *descriptor |= PAGE_L1_DESCRIPTOR; - *descriptor2 &= PAGE_L2_64K_MASK; - *descriptor2 |= PAGE_L2_64K_DESC; - break; - - case SECTION: - //error - break; - } - - return 0; -} - -/** \brief Create a 1MB Section - - \param [in] ttb Translation table base address - \param [in] base_address Section base address - \param [in] count Number of sections to create - \param [in] descriptor_l1 L1 descriptor (region attributes) - -*/ -__STATIC_INLINE void MMU_TTSection(uint32_t *ttb, uint32_t base_address, uint32_t count, uint32_t descriptor_l1) -{ - uint32_t offset; - uint32_t entry; - uint32_t i; - - offset = base_address >> 20; - entry = (base_address & 0xFFF00000) | descriptor_l1; - - //4 bytes aligned - ttb = ttb + offset; - - for (i = 0; i < count; i++ ) - { - //4 bytes aligned - *ttb++ = entry; - entry += OFFSET_1M; - } -} - -/** \brief Create a 4k page entry - - \param [in] ttb L1 table base address - \param [in] base_address 4k base address - \param [in] count Number of 4k pages to create - \param [in] descriptor_l1 L1 descriptor (region attributes) - \param [in] ttb_l2 L2 table base address - \param [in] descriptor_l2 L2 descriptor (region attributes) - -*/ -__STATIC_INLINE void MMU_TTPage4k(uint32_t *ttb, uint32_t base_address, uint32_t count, uint32_t descriptor_l1, uint32_t *ttb_l2, uint32_t descriptor_l2 ) -{ - - uint32_t offset, offset2; - uint32_t entry, entry2; - uint32_t i; - - offset = base_address >> 20; - entry = ((int)ttb_l2 & 0xFFFFFC00) | descriptor_l1; - - //4 bytes aligned - ttb += offset; - //create l1_entry - *ttb = entry; - - offset2 = (base_address & 0xff000) >> 12; - ttb_l2 += offset2; - entry2 = (base_address & 0xFFFFF000) | descriptor_l2; - for (i = 0; i < count; i++ ) - { - //4 bytes aligned - *ttb_l2++ = entry2; - entry2 += OFFSET_4K; - } -} - -/** \brief Create a 64k page entry - - \param [in] ttb L1 table base address - \param [in] base_address 64k base address - \param [in] count Number of 64k pages to create - \param [in] descriptor_l1 L1 descriptor (region attributes) - \param [in] ttb_l2 L2 table base address - \param [in] descriptor_l2 L2 descriptor (region attributes) - -*/ -__STATIC_INLINE void MMU_TTPage64k(uint32_t *ttb, uint32_t base_address, uint32_t count, uint32_t descriptor_l1, uint32_t *ttb_l2, uint32_t descriptor_l2 ) -{ - uint32_t offset, offset2; - uint32_t entry, entry2; - uint32_t i,j; - - - offset = base_address >> 20; - entry = ((int)ttb_l2 & 0xFFFFFC00) | descriptor_l1; - - //4 bytes aligned - ttb += offset; - //create l1_entry - *ttb = entry; - - offset2 = (base_address & 0xff000) >> 12; - ttb_l2 += offset2; - entry2 = (base_address & 0xFFFF0000) | descriptor_l2; - for (i = 0; i < count; i++ ) - { - //create 16 entries - for (j = 0; j < 16; j++) - { - //4 bytes aligned - *ttb_l2++ = entry2; - } - entry2 += OFFSET_64K; - } -} - -/** \brief Enable MMU -*/ -__STATIC_INLINE void MMU_Enable(void) -{ - // Set M bit 0 to enable the MMU - // Set AFE bit to enable simplified access permissions model - // Clear TRE bit to disable TEX remap and A bit to disable strict alignment fault checking - __set_SCTLR( (__get_SCTLR() & ~(1 << 28) & ~(1 << 1)) | 1 | (1 << 29)); - __ISB(); -} - -/** \brief Disable MMU -*/ -__STATIC_INLINE void MMU_Disable(void) -{ - // Clear M bit 0 to disable the MMU - __set_SCTLR( __get_SCTLR() & ~1); - __ISB(); -} - -/** \brief Invalidate entire unified TLB -*/ - -__STATIC_INLINE void MMU_InvalidateTLB(void) -{ - __set_TLBIALL(0); - __DSB(); //ensure completion of the invalidation - __ISB(); //ensure instruction fetch path sees new state -} - - -#ifdef __cplusplus -} -#endif - -#endif /* __CORE_CA_H_DEPENDANT */ - -#endif /* __CMSIS_GENERIC */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/irq_ctrl.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/irq_ctrl.h deleted file mode 100644 index 1ca29a2..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Include/irq_ctrl.h +++ /dev/null @@ -1,192 +0,0 @@ -/**************************************************************************//** - * @file irq_ctrl.h - * @brief Interrupt Controller API header file - * @version V1.1.0 - * @date 03. March 2020 - ******************************************************************************/ -/* - * Copyright (c) 2017-2020 ARM Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#if defined ( __ICCARM__ ) - #pragma system_include /* treat file as system include file for MISRA check */ -#elif defined (__clang__) - #pragma clang system_header /* treat file as system include file */ -#endif - -#ifndef IRQ_CTRL_H_ -#define IRQ_CTRL_H_ - -#include - -#ifndef IRQHANDLER_T -#define IRQHANDLER_T -/// Interrupt handler data type -typedef void (*IRQHandler_t) (void); -#endif - -#ifndef IRQN_ID_T -#define IRQN_ID_T -/// Interrupt ID number data type -typedef int32_t IRQn_ID_t; -#endif - -/* Interrupt mode bit-masks */ -#define IRQ_MODE_TRIG_Pos (0U) -#define IRQ_MODE_TRIG_Msk (0x07UL /*<< IRQ_MODE_TRIG_Pos*/) -#define IRQ_MODE_TRIG_LEVEL (0x00UL /*<< IRQ_MODE_TRIG_Pos*/) ///< Trigger: level triggered interrupt -#define IRQ_MODE_TRIG_LEVEL_LOW (0x01UL /*<< IRQ_MODE_TRIG_Pos*/) ///< Trigger: low level triggered interrupt -#define IRQ_MODE_TRIG_LEVEL_HIGH (0x02UL /*<< IRQ_MODE_TRIG_Pos*/) ///< Trigger: high level triggered interrupt -#define IRQ_MODE_TRIG_EDGE (0x04UL /*<< IRQ_MODE_TRIG_Pos*/) ///< Trigger: edge triggered interrupt -#define IRQ_MODE_TRIG_EDGE_RISING (0x05UL /*<< IRQ_MODE_TRIG_Pos*/) ///< Trigger: rising edge triggered interrupt -#define IRQ_MODE_TRIG_EDGE_FALLING (0x06UL /*<< IRQ_MODE_TRIG_Pos*/) ///< Trigger: falling edge triggered interrupt -#define IRQ_MODE_TRIG_EDGE_BOTH (0x07UL /*<< IRQ_MODE_TRIG_Pos*/) ///< Trigger: rising and falling edge triggered interrupt - -#define IRQ_MODE_TYPE_Pos (3U) -#define IRQ_MODE_TYPE_Msk (0x01UL << IRQ_MODE_TYPE_Pos) -#define IRQ_MODE_TYPE_IRQ (0x00UL << IRQ_MODE_TYPE_Pos) ///< Type: interrupt source triggers CPU IRQ line -#define IRQ_MODE_TYPE_FIQ (0x01UL << IRQ_MODE_TYPE_Pos) ///< Type: interrupt source triggers CPU FIQ line - -#define IRQ_MODE_DOMAIN_Pos (4U) -#define IRQ_MODE_DOMAIN_Msk (0x01UL << IRQ_MODE_DOMAIN_Pos) -#define IRQ_MODE_DOMAIN_NONSECURE (0x00UL << IRQ_MODE_DOMAIN_Pos) ///< Domain: interrupt is targeting non-secure domain -#define IRQ_MODE_DOMAIN_SECURE (0x01UL << IRQ_MODE_DOMAIN_Pos) ///< Domain: interrupt is targeting secure domain - -#define IRQ_MODE_CPU_Pos (5U) -#define IRQ_MODE_CPU_Msk (0xFFUL << IRQ_MODE_CPU_Pos) -#define IRQ_MODE_CPU_ALL (0x00UL << IRQ_MODE_CPU_Pos) ///< CPU: interrupt targets all CPUs -#define IRQ_MODE_CPU_0 (0x01UL << IRQ_MODE_CPU_Pos) ///< CPU: interrupt targets CPU 0 -#define IRQ_MODE_CPU_1 (0x02UL << IRQ_MODE_CPU_Pos) ///< CPU: interrupt targets CPU 1 -#define IRQ_MODE_CPU_2 (0x04UL << IRQ_MODE_CPU_Pos) ///< CPU: interrupt targets CPU 2 -#define IRQ_MODE_CPU_3 (0x08UL << IRQ_MODE_CPU_Pos) ///< CPU: interrupt targets CPU 3 -#define IRQ_MODE_CPU_4 (0x10UL << IRQ_MODE_CPU_Pos) ///< CPU: interrupt targets CPU 4 -#define IRQ_MODE_CPU_5 (0x20UL << IRQ_MODE_CPU_Pos) ///< CPU: interrupt targets CPU 5 -#define IRQ_MODE_CPU_6 (0x40UL << IRQ_MODE_CPU_Pos) ///< CPU: interrupt targets CPU 6 -#define IRQ_MODE_CPU_7 (0x80UL << IRQ_MODE_CPU_Pos) ///< CPU: interrupt targets CPU 7 - -// Encoding in some early GIC implementations -#define IRQ_MODE_MODEL_Pos (13U) -#define IRQ_MODE_MODEL_Msk (0x1UL << IRQ_MODE_MODEL_Pos) -#define IRQ_MODE_MODEL_NN (0x0UL << IRQ_MODE_MODEL_Pos) ///< Corresponding interrupt is handled using the N-N model -#define IRQ_MODE_MODEL_1N (0x1UL << IRQ_MODE_MODEL_Pos) ///< Corresponding interrupt is handled using the 1-N model - -#define IRQ_MODE_ERROR (0x80000000UL) ///< Bit indicating mode value error - -/* Interrupt priority bit-masks */ -#define IRQ_PRIORITY_Msk (0x0000FFFFUL) ///< Interrupt priority value bit-mask -#define IRQ_PRIORITY_ERROR (0x80000000UL) ///< Bit indicating priority value error - -/// Initialize interrupt controller. -/// \return 0 on success, -1 on error. -int32_t IRQ_Initialize (void); - -/// Register interrupt handler. -/// \param[in] irqn interrupt ID number -/// \param[in] handler interrupt handler function address -/// \return 0 on success, -1 on error. -int32_t IRQ_SetHandler (IRQn_ID_t irqn, IRQHandler_t handler); - -/// Get the registered interrupt handler. -/// \param[in] irqn interrupt ID number -/// \return registered interrupt handler function address. -IRQHandler_t IRQ_GetHandler (IRQn_ID_t irqn); - -/// Enable interrupt. -/// \param[in] irqn interrupt ID number -/// \return 0 on success, -1 on error. -int32_t IRQ_Enable (IRQn_ID_t irqn); - -/// Disable interrupt. -/// \param[in] irqn interrupt ID number -/// \return 0 on success, -1 on error. -int32_t IRQ_Disable (IRQn_ID_t irqn); - -/// Get interrupt enable state. -/// \param[in] irqn interrupt ID number -/// \return 0 - interrupt is disabled, 1 - interrupt is enabled. -uint32_t IRQ_GetEnableState (IRQn_ID_t irqn); - -/// Configure interrupt request mode. -/// \param[in] irqn interrupt ID number -/// \param[in] mode mode configuration -/// \return 0 on success, -1 on error. -int32_t IRQ_SetMode (IRQn_ID_t irqn, uint32_t mode); - -/// Get interrupt mode configuration. -/// \param[in] irqn interrupt ID number -/// \return current interrupt mode configuration with optional IRQ_MODE_ERROR bit set. -uint32_t IRQ_GetMode (IRQn_ID_t irqn); - -/// Get ID number of current interrupt request (IRQ). -/// \return interrupt ID number. -IRQn_ID_t IRQ_GetActiveIRQ (void); - -/// Get ID number of current fast interrupt request (FIQ). -/// \return interrupt ID number. -IRQn_ID_t IRQ_GetActiveFIQ (void); - -/// Signal end of interrupt processing. -/// \param[in] irqn interrupt ID number -/// \return 0 on success, -1 on error. -int32_t IRQ_EndOfInterrupt (IRQn_ID_t irqn); - -/// Set interrupt pending flag. -/// \param[in] irqn interrupt ID number -/// \return 0 on success, -1 on error. -int32_t IRQ_SetPending (IRQn_ID_t irqn); - -/// Get interrupt pending flag. -/// \param[in] irqn interrupt ID number -/// \return 0 - interrupt is not pending, 1 - interrupt is pending. -uint32_t IRQ_GetPending (IRQn_ID_t irqn); - -/// Clear interrupt pending flag. -/// \param[in] irqn interrupt ID number -/// \return 0 on success, -1 on error. -int32_t IRQ_ClearPending (IRQn_ID_t irqn); - -/// Set interrupt priority value. -/// \param[in] irqn interrupt ID number -/// \param[in] priority interrupt priority value -/// \return 0 on success, -1 on error. -int32_t IRQ_SetPriority (IRQn_ID_t irqn, uint32_t priority); - -/// Get interrupt priority. -/// \param[in] irqn interrupt ID number -/// \return current interrupt priority value with optional IRQ_PRIORITY_ERROR bit set. -uint32_t IRQ_GetPriority (IRQn_ID_t irqn); - -/// Set priority masking threshold. -/// \param[in] priority priority masking threshold value -/// \return 0 on success, -1 on error. -int32_t IRQ_SetPriorityMask (uint32_t priority); - -/// Get priority masking threshold -/// \return current priority masking threshold value with optional IRQ_PRIORITY_ERROR bit set. -uint32_t IRQ_GetPriorityMask (void); - -/// Set priority grouping field split point -/// \param[in] bits number of MSB bits included in the group priority field comparison -/// \return 0 on success, -1 on error. -int32_t IRQ_SetPriorityGroupBits (uint32_t bits); - -/// Get priority grouping field split point -/// \return current number of MSB bits included in the group priority field comparison with -/// optional IRQ_PRIORITY_ERROR bit set. -uint32_t IRQ_GetPriorityGroupBits (void); - -#endif // IRQ_CTRL_H_ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Source/irq_ctrl_gic.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Source/irq_ctrl_gic.c deleted file mode 100644 index 15588bf..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/Core_A/Source/irq_ctrl_gic.c +++ /dev/null @@ -1,418 +0,0 @@ -/**************************************************************************//** - * @file irq_ctrl_gic.c - * @brief Interrupt controller handling implementation for GIC - * @version V1.1.1 - * @date 29. March 2021 - ******************************************************************************/ -/* - * Copyright (c) 2017-2021 ARM Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include "RTE_Components.h" -#include CMSIS_device_header - -#include "irq_ctrl.h" - -#if defined(__GIC_PRESENT) && (__GIC_PRESENT == 1U) - -/// Number of implemented interrupt lines -#ifndef IRQ_GIC_LINE_COUNT -#define IRQ_GIC_LINE_COUNT (1020U) -#endif - -static IRQHandler_t IRQTable[IRQ_GIC_LINE_COUNT] = { 0U }; -static uint32_t IRQ_ID0; - -/// Initialize interrupt controller. -__WEAK int32_t IRQ_Initialize (void) { - uint32_t i; - - for (i = 0U; i < IRQ_GIC_LINE_COUNT; i++) { - IRQTable[i] = (IRQHandler_t)NULL; - } - GIC_Enable(); - return (0); -} - - -/// Register interrupt handler. -__WEAK int32_t IRQ_SetHandler (IRQn_ID_t irqn, IRQHandler_t handler) { - int32_t status; - - if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) { - IRQTable[irqn] = handler; - status = 0; - } else { - status = -1; - } - - return (status); -} - - -/// Get the registered interrupt handler. -__WEAK IRQHandler_t IRQ_GetHandler (IRQn_ID_t irqn) { - IRQHandler_t h; - - // Ignore CPUID field (software generated interrupts) - irqn &= 0x3FFU; - - if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) { - h = IRQTable[irqn]; - } else { - h = (IRQHandler_t)0; - } - - return (h); -} - - -/// Enable interrupt. -__WEAK int32_t IRQ_Enable (IRQn_ID_t irqn) { - int32_t status; - - if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) { - GIC_EnableIRQ ((IRQn_Type)irqn); - status = 0; - } else { - status = -1; - } - - return (status); -} - - -/// Disable interrupt. -__WEAK int32_t IRQ_Disable (IRQn_ID_t irqn) { - int32_t status; - - if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) { - GIC_DisableIRQ ((IRQn_Type)irqn); - status = 0; - } else { - status = -1; - } - - return (status); -} - - -/// Get interrupt enable state. -__WEAK uint32_t IRQ_GetEnableState (IRQn_ID_t irqn) { - uint32_t enable; - - if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) { - enable = GIC_GetEnableIRQ((IRQn_Type)irqn); - } else { - enable = 0U; - } - - return (enable); -} - - -/// Configure interrupt request mode. -__WEAK int32_t IRQ_SetMode (IRQn_ID_t irqn, uint32_t mode) { - uint32_t val; - uint8_t cfg; - uint8_t secure; - uint8_t cpu; - int32_t status = 0; - - if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) { - // Check triggering mode - val = (mode & IRQ_MODE_TRIG_Msk); - - if (val == IRQ_MODE_TRIG_LEVEL) { - cfg = 0x00U; - } else if (val == IRQ_MODE_TRIG_EDGE) { - cfg = 0x02U; - } else { - cfg = 0x00U; - status = -1; - } - - val = (mode & IRQ_MODE_MODEL_Msk); - if (val == IRQ_MODE_MODEL_1N) { - cfg |= 1; // 1-N model - } - - // Check interrupt type - val = mode & IRQ_MODE_TYPE_Msk; - - if (val != IRQ_MODE_TYPE_IRQ) { - status = -1; - } - - // Check interrupt domain - val = mode & IRQ_MODE_DOMAIN_Msk; - - if (val == IRQ_MODE_DOMAIN_NONSECURE) { - secure = 0U; - } else { - // Check security extensions support - val = GIC_DistributorInfo() & (1UL << 10U); - - if (val != 0U) { - // Security extensions are supported - secure = 1U; - } else { - secure = 0U; - status = -1; - } - } - - // Check interrupt CPU targets - val = mode & IRQ_MODE_CPU_Msk; - - if (val == IRQ_MODE_CPU_ALL) { - cpu = 0xFFU; - } else { - cpu = (uint8_t)(val >> IRQ_MODE_CPU_Pos); - } - - // Apply configuration if no mode error - if (status == 0) { - GIC_SetConfiguration((IRQn_Type)irqn, cfg); - GIC_SetTarget ((IRQn_Type)irqn, cpu); - - if (secure != 0U) { - GIC_SetGroup ((IRQn_Type)irqn, secure); - } - } - } - - return (status); -} - - -/// Get interrupt mode configuration. -__WEAK uint32_t IRQ_GetMode (IRQn_ID_t irqn) { - uint32_t mode; - uint32_t val; - - if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) { - mode = IRQ_MODE_TYPE_IRQ; - - // Get trigger mode - val = GIC_GetConfiguration((IRQn_Type)irqn); - - if ((val & 2U) != 0U) { - // Corresponding interrupt is edge triggered - mode |= IRQ_MODE_TRIG_EDGE; - } else { - // Corresponding interrupt is level triggered - mode |= IRQ_MODE_TRIG_LEVEL; - } - - if (val & 1U) { - mode |= IRQ_MODE_MODEL_1N; - } - // Get interrupt CPU targets - mode |= GIC_GetTarget ((IRQn_Type)irqn) << IRQ_MODE_CPU_Pos; - - } else { - mode = IRQ_MODE_ERROR; - } - - return (mode); -} - - -/// Get ID number of current interrupt request (IRQ). -__WEAK IRQn_ID_t IRQ_GetActiveIRQ (void) { - IRQn_ID_t irqn; - uint32_t prio; - - /* Dummy read to avoid GIC 390 errata 801120 */ - GIC_GetHighPendingIRQ(); - - irqn = GIC_AcknowledgePending(); - - __DSB(); - - /* Workaround GIC 390 errata 733075 (GIC-390_Errata_Notice_v6.pdf, 09-Jul-2014) */ - /* The following workaround code is for a single-core system. It would be */ - /* different in a multi-core system. */ - /* If the ID is 0 or 0x3FE or 0x3FF, then the GIC CPU interface may be locked-up */ - /* so unlock it, otherwise service the interrupt as normal. */ - /* Special IDs 1020=0x3FC and 1021=0x3FD are reserved values in GICv1 and GICv2 */ - /* so will not occur here. */ - - if ((irqn == 0) || (irqn >= 0x3FE)) { - /* Unlock the CPU interface with a dummy write to Interrupt Priority Register */ - prio = GIC_GetPriority((IRQn_Type)0); - GIC_SetPriority ((IRQn_Type)0, prio); - - __DSB(); - - if ((irqn == 0U) && ((GIC_GetIRQStatus ((IRQn_Type)irqn) & 1U) != 0U) && (IRQ_ID0 == 0U)) { - /* If the ID is 0, is active and has not been seen before */ - IRQ_ID0 = 1U; - } - /* End of Workaround GIC 390 errata 733075 */ - } - - return (irqn); -} - - -/// Get ID number of current fast interrupt request (FIQ). -__WEAK IRQn_ID_t IRQ_GetActiveFIQ (void) { - return ((IRQn_ID_t)-1); -} - - -/// Signal end of interrupt processing. -__WEAK int32_t IRQ_EndOfInterrupt (IRQn_ID_t irqn) { - int32_t status; - IRQn_Type irq = (IRQn_Type)irqn; - - irqn &= 0x3FFU; - - if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) { - GIC_EndInterrupt (irq); - - if (irqn == 0) { - IRQ_ID0 = 0U; - } - - status = 0; - } else { - status = -1; - } - - return (status); -} - - -/// Set interrupt pending flag. -__WEAK int32_t IRQ_SetPending (IRQn_ID_t irqn) { - int32_t status; - - if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) { - GIC_SetPendingIRQ ((IRQn_Type)irqn); - status = 0; - } else { - status = -1; - } - - return (status); -} - -/// Get interrupt pending flag. -__WEAK uint32_t IRQ_GetPending (IRQn_ID_t irqn) { - uint32_t pending; - - if ((irqn >= 16) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) { - pending = GIC_GetPendingIRQ ((IRQn_Type)irqn); - } else { - pending = 0U; - } - - return (pending & 1U); -} - - -/// Clear interrupt pending flag. -__WEAK int32_t IRQ_ClearPending (IRQn_ID_t irqn) { - int32_t status; - - if ((irqn >= 16) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) { - GIC_ClearPendingIRQ ((IRQn_Type)irqn); - status = 0; - } else { - status = -1; - } - - return (status); -} - - -/// Set interrupt priority value. -__WEAK int32_t IRQ_SetPriority (IRQn_ID_t irqn, uint32_t priority) { - int32_t status; - - if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) { - GIC_SetPriority ((IRQn_Type)irqn, priority); - status = 0; - } else { - status = -1; - } - - return (status); -} - - -/// Get interrupt priority. -__WEAK uint32_t IRQ_GetPriority (IRQn_ID_t irqn) { - uint32_t priority; - - if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) { - priority = GIC_GetPriority ((IRQn_Type)irqn); - } else { - priority = IRQ_PRIORITY_ERROR; - } - - return (priority); -} - - -/// Set priority masking threshold. -__WEAK int32_t IRQ_SetPriorityMask (uint32_t priority) { - GIC_SetInterfacePriorityMask (priority); - return (0); -} - - -/// Get priority masking threshold -__WEAK uint32_t IRQ_GetPriorityMask (void) { - return GIC_GetInterfacePriorityMask(); -} - - -/// Set priority grouping field split point -__WEAK int32_t IRQ_SetPriorityGroupBits (uint32_t bits) { - int32_t status; - - if (bits == IRQ_PRIORITY_Msk) { - bits = 7U; - } - - if (bits < 8U) { - GIC_SetBinaryPoint (7U - bits); - status = 0; - } else { - status = -1; - } - - return (status); -} - - -/// Get priority grouping field split point -__WEAK uint32_t IRQ_GetPriorityGroupBits (void) { - uint32_t bp; - - bp = GIC_GetBinaryPoint() & 0x07U; - - return (7U - bp); -} - -#endif diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Config/DAP_config.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Config/DAP_config.h deleted file mode 100644 index 5e62cf4..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Config/DAP_config.h +++ /dev/null @@ -1,561 +0,0 @@ -/* - * Copyright (c) 2013-2021 ARM Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * ---------------------------------------------------------------------- - * - * $Date: 16. June 2021 - * $Revision: V2.1.0 - * - * Project: CMSIS-DAP Configuration - * Title: DAP_config.h CMSIS-DAP Configuration File (Template) - * - *---------------------------------------------------------------------------*/ - -#ifndef __DAP_CONFIG_H__ -#define __DAP_CONFIG_H__ - - -//************************************************************************************************** -/** -\defgroup DAP_Config_Debug_gr CMSIS-DAP Debug Unit Information -\ingroup DAP_ConfigIO_gr -@{ -Provides definitions about the hardware and configuration of the Debug Unit. - -This information includes: - - Definition of Cortex-M processor parameters used in CMSIS-DAP Debug Unit. - - Debug Unit Identification strings (Vendor, Product, Serial Number). - - Debug Unit communication packet size. - - Debug Access Port supported modes and settings (JTAG/SWD and SWO). - - Optional information about a connected Target Device (for Evaluation Boards). -*/ - -#ifdef _RTE_ -#include "RTE_Components.h" -#include CMSIS_device_header -#else -#include "device.h" // Debug Unit Cortex-M Processor Header File -#endif - -/// Processor Clock of the Cortex-M MCU used in the Debug Unit. -/// This value is used to calculate the SWD/JTAG clock speed. -#define CPU_CLOCK 100000000U ///< Specifies the CPU Clock in Hz. - -/// Number of processor cycles for I/O Port write operations. -/// This value is used to calculate the SWD/JTAG clock speed that is generated with I/O -/// Port write operations in the Debug Unit by a Cortex-M MCU. Most Cortex-M processors -/// require 2 processor cycles for a I/O Port Write operation. If the Debug Unit uses -/// a Cortex-M0+ processor with high-speed peripheral I/O only 1 processor cycle might be -/// required. -#define IO_PORT_WRITE_CYCLES 2U ///< I/O Cycles: 2=default, 1=Cortex-M0+ fast I/0. - -/// Indicate that Serial Wire Debug (SWD) communication mode is available at the Debug Access Port. -/// This information is returned by the command \ref DAP_Info as part of Capabilities. -#define DAP_SWD 1 ///< SWD Mode: 1 = available, 0 = not available. - -/// Indicate that JTAG communication mode is available at the Debug Port. -/// This information is returned by the command \ref DAP_Info as part of Capabilities. -#define DAP_JTAG 1 ///< JTAG Mode: 1 = available, 0 = not available. - -/// Configure maximum number of JTAG devices on the scan chain connected to the Debug Access Port. -/// This setting impacts the RAM requirements of the Debug Unit. Valid range is 1 .. 255. -#define DAP_JTAG_DEV_CNT 8U ///< Maximum number of JTAG devices on scan chain. - -/// Default communication mode on the Debug Access Port. -/// Used for the command \ref DAP_Connect when Port Default mode is selected. -#define DAP_DEFAULT_PORT 1U ///< Default JTAG/SWJ Port Mode: 1 = SWD, 2 = JTAG. - -/// Default communication speed on the Debug Access Port for SWD and JTAG mode. -/// Used to initialize the default SWD/JTAG clock frequency. -/// The command \ref DAP_SWJ_Clock can be used to overwrite this default setting. -#define DAP_DEFAULT_SWJ_CLOCK 1000000U ///< Default SWD/JTAG clock frequency in Hz. - -/// Maximum Package Size for Command and Response data. -/// This configuration settings is used to optimize the communication performance with the -/// debugger and depends on the USB peripheral. Typical vales are 64 for Full-speed USB HID or WinUSB, -/// 1024 for High-speed USB HID and 512 for High-speed USB WinUSB. -#define DAP_PACKET_SIZE 512U ///< Specifies Packet Size in bytes. - -/// Maximum Package Buffers for Command and Response data. -/// This configuration settings is used to optimize the communication performance with the -/// debugger and depends on the USB peripheral. For devices with limited RAM or USB buffer the -/// setting can be reduced (valid range is 1 .. 255). -#define DAP_PACKET_COUNT 8U ///< Specifies number of packets buffered. - -/// Indicate that UART Serial Wire Output (SWO) trace is available. -/// This information is returned by the command \ref DAP_Info as part of Capabilities. -#define SWO_UART 1 ///< SWO UART: 1 = available, 0 = not available. - -/// USART Driver instance number for the UART SWO. -#define SWO_UART_DRIVER 0 ///< USART Driver instance number (Driver_USART#). - -/// Maximum SWO UART Baudrate. -#define SWO_UART_MAX_BAUDRATE 10000000U ///< SWO UART Maximum Baudrate in Hz. - -/// Indicate that Manchester Serial Wire Output (SWO) trace is available. -/// This information is returned by the command \ref DAP_Info as part of Capabilities. -#define SWO_MANCHESTER 0 ///< SWO Manchester: 1 = available, 0 = not available. - -/// SWO Trace Buffer Size. -#define SWO_BUFFER_SIZE 4096U ///< SWO Trace Buffer Size in bytes (must be 2^n). - -/// SWO Streaming Trace. -#define SWO_STREAM 0 ///< SWO Streaming Trace: 1 = available, 0 = not available. - -/// Clock frequency of the Test Domain Timer. Timer value is returned with \ref TIMESTAMP_GET. -#define TIMESTAMP_CLOCK 100000000U ///< Timestamp clock in Hz (0 = timestamps not supported). - -/// Indicate that UART Communication Port is available. -/// This information is returned by the command \ref DAP_Info as part of Capabilities. -#define DAP_UART 1 ///< DAP UART: 1 = available, 0 = not available. - -/// USART Driver instance number for the UART Communication Port. -#define DAP_UART_DRIVER 1 ///< USART Driver instance number (Driver_USART#). - -/// UART Receive Buffer Size. -#define DAP_UART_RX_BUFFER_SIZE 1024U ///< Uart Receive Buffer Size in bytes (must be 2^n). - -/// UART Transmit Buffer Size. -#define DAP_UART_TX_BUFFER_SIZE 1024U ///< Uart Transmit Buffer Size in bytes (must be 2^n). - -/// Indicate that UART Communication via USB COM Port is available. -/// This information is returned by the command \ref DAP_Info as part of Capabilities. -#define DAP_UART_USB_COM_PORT 1 ///< USB COM Port: 1 = available, 0 = not available. - -/// Debug Unit is connected to fixed Target Device. -/// The Debug Unit may be part of an evaluation board and always connected to a fixed -/// known device. In this case a Device Vendor, Device Name, Board Vendor and Board Name strings -/// are stored and may be used by the debugger or IDE to configure device parameters. -#define TARGET_FIXED 0 ///< Target: 1 = known, 0 = unknown; - -#define TARGET_DEVICE_VENDOR "Arm" ///< String indicating the Silicon Vendor -#define TARGET_DEVICE_NAME "Cortex-M" ///< String indicating the Target Device -#define TARGET_BOARD_VENDOR "Arm" ///< String indicating the Board Vendor -#define TARGET_BOARD_NAME "Arm board" ///< String indicating the Board Name - -#if TARGET_FIXED != 0 -#include -static const char TargetDeviceVendor [] = TARGET_DEVICE_VENDOR; -static const char TargetDeviceName [] = TARGET_DEVICE_NAME; -static const char TargetBoardVendor [] = TARGET_BOARD_VENDOR; -static const char TargetBoardName [] = TARGET_BOARD_NAME; -#endif - -/** Get Vendor Name string. -\param str Pointer to buffer to store the string (max 60 characters). -\return String length (including terminating NULL character) or 0 (no string). -*/ -__STATIC_INLINE uint8_t DAP_GetVendorString (char *str) { - (void)str; - return (0U); -} - -/** Get Product Name string. -\param str Pointer to buffer to store the string (max 60 characters). -\return String length (including terminating NULL character) or 0 (no string). -*/ -__STATIC_INLINE uint8_t DAP_GetProductString (char *str) { - (void)str; - return (0U); -} - -/** Get Serial Number string. -\param str Pointer to buffer to store the string (max 60 characters). -\return String length (including terminating NULL character) or 0 (no string). -*/ -__STATIC_INLINE uint8_t DAP_GetSerNumString (char *str) { - (void)str; - return (0U); -} - -/** Get Target Device Vendor string. -\param str Pointer to buffer to store the string (max 60 characters). -\return String length (including terminating NULL character) or 0 (no string). -*/ -__STATIC_INLINE uint8_t DAP_GetTargetDeviceVendorString (char *str) { -#if TARGET_FIXED != 0 - uint8_t len; - - strcpy(str, TargetDeviceVendor); - len = (uint8_t)(strlen(TargetDeviceVendor) + 1U); - return (len); -#else - (void)str; - return (0U); -#endif -} - -/** Get Target Device Name string. -\param str Pointer to buffer to store the string (max 60 characters). -\return String length (including terminating NULL character) or 0 (no string). -*/ -__STATIC_INLINE uint8_t DAP_GetTargetDeviceNameString (char *str) { -#if TARGET_FIXED != 0 - uint8_t len; - - strcpy(str, TargetDeviceName); - len = (uint8_t)(strlen(TargetDeviceName) + 1U); - return (len); -#else - (void)str; - return (0U); -#endif -} - -/** Get Target Board Vendor string. -\param str Pointer to buffer to store the string (max 60 characters). -\return String length (including terminating NULL character) or 0 (no string). -*/ -__STATIC_INLINE uint8_t DAP_GetTargetBoardVendorString (char *str) { -#if TARGET_FIXED != 0 - uint8_t len; - - strcpy(str, TargetBoardVendor); - len = (uint8_t)(strlen(TargetBoardVendor) + 1U); - return (len); -#else - (void)str; - return (0U); -#endif -} - -/** Get Target Board Name string. -\param str Pointer to buffer to store the string (max 60 characters). -\return String length (including terminating NULL character) or 0 (no string). -*/ -__STATIC_INLINE uint8_t DAP_GetTargetBoardNameString (char *str) { -#if TARGET_FIXED != 0 - uint8_t len; - - strcpy(str, TargetBoardName); - len = (uint8_t)(strlen(TargetBoardName) + 1U); - return (len); -#else - (void)str; - return (0U); -#endif -} - -/** Get Product Firmware Version string. -\param str Pointer to buffer to store the string (max 60 characters). -\return String length (including terminating NULL character) or 0 (no string). -*/ -__STATIC_INLINE uint8_t DAP_GetProductFirmwareVersionString (char *str) { - (void)str; - return (0U); -} - -///@} - - -//************************************************************************************************** -/** -\defgroup DAP_Config_PortIO_gr CMSIS-DAP Hardware I/O Pin Access -\ingroup DAP_ConfigIO_gr -@{ - -Standard I/O Pins of the CMSIS-DAP Hardware Debug Port support standard JTAG mode -and Serial Wire Debug (SWD) mode. In SWD mode only 2 pins are required to implement the debug -interface of a device. The following I/O Pins are provided: - -JTAG I/O Pin | SWD I/O Pin | CMSIS-DAP Hardware pin mode ----------------------------- | -------------------- | --------------------------------------------- -TCK: Test Clock | SWCLK: Clock | Output Push/Pull -TMS: Test Mode Select | SWDIO: Data I/O | Output Push/Pull; Input (for receiving data) -TDI: Test Data Input | | Output Push/Pull -TDO: Test Data Output | | Input -nTRST: Test Reset (optional) | | Output Open Drain with pull-up resistor -nRESET: Device Reset | nRESET: Device Reset | Output Open Drain with pull-up resistor - - -DAP Hardware I/O Pin Access Functions -------------------------------------- -The various I/O Pins are accessed by functions that implement the Read, Write, Set, or Clear to -these I/O Pins. - -For the SWDIO I/O Pin there are additional functions that are called in SWD I/O mode only. -This functions are provided to achieve faster I/O that is possible with some advanced GPIO -peripherals that can independently write/read a single I/O pin without affecting any other pins -of the same I/O port. The following SWDIO I/O Pin functions are provided: - - \ref PIN_SWDIO_OUT_ENABLE to enable the output mode from the DAP hardware. - - \ref PIN_SWDIO_OUT_DISABLE to enable the input mode to the DAP hardware. - - \ref PIN_SWDIO_IN to read from the SWDIO I/O pin with utmost possible speed. - - \ref PIN_SWDIO_OUT to write to the SWDIO I/O pin with utmost possible speed. -*/ - - -// Configure DAP I/O pins ------------------------------ - -/** Setup JTAG I/O pins: TCK, TMS, TDI, TDO, nTRST, and nRESET. -Configures the DAP Hardware I/O pins for JTAG mode: - - TCK, TMS, TDI, nTRST, nRESET to output mode and set to high level. - - TDO to input mode. -*/ -__STATIC_INLINE void PORT_JTAG_SETUP (void) { - ; -} - -/** Setup SWD I/O pins: SWCLK, SWDIO, and nRESET. -Configures the DAP Hardware I/O pins for Serial Wire Debug (SWD) mode: - - SWCLK, SWDIO, nRESET to output mode and set to default high level. - - TDI, nTRST to HighZ mode (pins are unused in SWD mode). -*/ -__STATIC_INLINE void PORT_SWD_SETUP (void) { - ; -} - -/** Disable JTAG/SWD I/O Pins. -Disables the DAP Hardware I/O pins which configures: - - TCK/SWCLK, TMS/SWDIO, TDI, TDO, nTRST, nRESET to High-Z mode. -*/ -__STATIC_INLINE void PORT_OFF (void) { - ; -} - - -// SWCLK/TCK I/O pin ------------------------------------- - -/** SWCLK/TCK I/O pin: Get Input. -\return Current status of the SWCLK/TCK DAP hardware I/O pin. -*/ -__STATIC_FORCEINLINE uint32_t PIN_SWCLK_TCK_IN (void) { - return (0U); -} - -/** SWCLK/TCK I/O pin: Set Output to High. -Set the SWCLK/TCK DAP hardware I/O pin to high level. -*/ -__STATIC_FORCEINLINE void PIN_SWCLK_TCK_SET (void) { - ; -} - -/** SWCLK/TCK I/O pin: Set Output to Low. -Set the SWCLK/TCK DAP hardware I/O pin to low level. -*/ -__STATIC_FORCEINLINE void PIN_SWCLK_TCK_CLR (void) { - ; -} - - -// SWDIO/TMS Pin I/O -------------------------------------- - -/** SWDIO/TMS I/O pin: Get Input. -\return Current status of the SWDIO/TMS DAP hardware I/O pin. -*/ -__STATIC_FORCEINLINE uint32_t PIN_SWDIO_TMS_IN (void) { - return (0U); -} - -/** SWDIO/TMS I/O pin: Set Output to High. -Set the SWDIO/TMS DAP hardware I/O pin to high level. -*/ -__STATIC_FORCEINLINE void PIN_SWDIO_TMS_SET (void) { - ; -} - -/** SWDIO/TMS I/O pin: Set Output to Low. -Set the SWDIO/TMS DAP hardware I/O pin to low level. -*/ -__STATIC_FORCEINLINE void PIN_SWDIO_TMS_CLR (void) { - ; -} - -/** SWDIO I/O pin: Get Input (used in SWD mode only). -\return Current status of the SWDIO DAP hardware I/O pin. -*/ -__STATIC_FORCEINLINE uint32_t PIN_SWDIO_IN (void) { - return (0U); -} - -/** SWDIO I/O pin: Set Output (used in SWD mode only). -\param bit Output value for the SWDIO DAP hardware I/O pin. -*/ -__STATIC_FORCEINLINE void PIN_SWDIO_OUT (uint32_t bit) { - ; -} - -/** SWDIO I/O pin: Switch to Output mode (used in SWD mode only). -Configure the SWDIO DAP hardware I/O pin to output mode. This function is -called prior \ref PIN_SWDIO_OUT function calls. -*/ -__STATIC_FORCEINLINE void PIN_SWDIO_OUT_ENABLE (void) { - ; -} - -/** SWDIO I/O pin: Switch to Input mode (used in SWD mode only). -Configure the SWDIO DAP hardware I/O pin to input mode. This function is -called prior \ref PIN_SWDIO_IN function calls. -*/ -__STATIC_FORCEINLINE void PIN_SWDIO_OUT_DISABLE (void) { - ; -} - - -// TDI Pin I/O --------------------------------------------- - -/** TDI I/O pin: Get Input. -\return Current status of the TDI DAP hardware I/O pin. -*/ -__STATIC_FORCEINLINE uint32_t PIN_TDI_IN (void) { - return (0U); -} - -/** TDI I/O pin: Set Output. -\param bit Output value for the TDI DAP hardware I/O pin. -*/ -__STATIC_FORCEINLINE void PIN_TDI_OUT (uint32_t bit) { - ; -} - - -// TDO Pin I/O --------------------------------------------- - -/** TDO I/O pin: Get Input. -\return Current status of the TDO DAP hardware I/O pin. -*/ -__STATIC_FORCEINLINE uint32_t PIN_TDO_IN (void) { - return (0U); -} - - -// nTRST Pin I/O ------------------------------------------- - -/** nTRST I/O pin: Get Input. -\return Current status of the nTRST DAP hardware I/O pin. -*/ -__STATIC_FORCEINLINE uint32_t PIN_nTRST_IN (void) { - return (0U); -} - -/** nTRST I/O pin: Set Output. -\param bit JTAG TRST Test Reset pin status: - - 0: issue a JTAG TRST Test Reset. - - 1: release JTAG TRST Test Reset. -*/ -__STATIC_FORCEINLINE void PIN_nTRST_OUT (uint32_t bit) { - ; -} - -// nRESET Pin I/O------------------------------------------ - -/** nRESET I/O pin: Get Input. -\return Current status of the nRESET DAP hardware I/O pin. -*/ -__STATIC_FORCEINLINE uint32_t PIN_nRESET_IN (void) { - return (0U); -} - -/** nRESET I/O pin: Set Output. -\param bit target device hardware reset pin status: - - 0: issue a device hardware reset. - - 1: release device hardware reset. -*/ -__STATIC_FORCEINLINE void PIN_nRESET_OUT (uint32_t bit) { - ; -} - -///@} - - -//************************************************************************************************** -/** -\defgroup DAP_Config_LEDs_gr CMSIS-DAP Hardware Status LEDs -\ingroup DAP_ConfigIO_gr -@{ - -CMSIS-DAP Hardware may provide LEDs that indicate the status of the CMSIS-DAP Debug Unit. - -It is recommended to provide the following LEDs for status indication: - - Connect LED: is active when the DAP hardware is connected to a debugger. - - Running LED: is active when the debugger has put the target device into running state. -*/ - -/** Debug Unit: Set status of Connected LED. -\param bit status of the Connect LED. - - 1: Connect LED ON: debugger is connected to CMSIS-DAP Debug Unit. - - 0: Connect LED OFF: debugger is not connected to CMSIS-DAP Debug Unit. -*/ -__STATIC_INLINE void LED_CONNECTED_OUT (uint32_t bit) {} - -/** Debug Unit: Set status Target Running LED. -\param bit status of the Target Running LED. - - 1: Target Running LED ON: program execution in target started. - - 0: Target Running LED OFF: program execution in target stopped. -*/ -__STATIC_INLINE void LED_RUNNING_OUT (uint32_t bit) {} - -///@} - - -//************************************************************************************************** -/** -\defgroup DAP_Config_Timestamp_gr CMSIS-DAP Timestamp -\ingroup DAP_ConfigIO_gr -@{ -Access function for Test Domain Timer. - -The value of the Test Domain Timer in the Debug Unit is returned by the function \ref TIMESTAMP_GET. By -default, the DWT timer is used. The frequency of this timer is configured with \ref TIMESTAMP_CLOCK. - -*/ - -/** Get timestamp of Test Domain Timer. -\return Current timestamp value. -*/ -__STATIC_INLINE uint32_t TIMESTAMP_GET (void) { - return (DWT->CYCCNT); -} - -///@} - - -//************************************************************************************************** -/** -\defgroup DAP_Config_Initialization_gr CMSIS-DAP Initialization -\ingroup DAP_ConfigIO_gr -@{ - -CMSIS-DAP Hardware I/O and LED Pins are initialized with the function \ref DAP_SETUP. -*/ - -/** Setup of the Debug Unit I/O pins and LEDs (called when Debug Unit is initialized). -This function performs the initialization of the CMSIS-DAP Hardware I/O Pins and the -Status LEDs. In detail the operation of Hardware I/O and LED pins are enabled and set: - - I/O clock system enabled. - - all I/O pins: input buffer enabled, output pins are set to HighZ mode. - - for nTRST, nRESET a weak pull-up (if available) is enabled. - - LED output pins are enabled and LEDs are turned off. -*/ -__STATIC_INLINE void DAP_SETUP (void) { - ; -} - -/** Reset Target Device with custom specific I/O pin or command sequence. -This function allows the optional implementation of a device specific reset sequence. -It is called when the command \ref DAP_ResetTarget and is for example required -when a device needs a time-critical unlock sequence that enables the debug port. -\return 0 = no device specific reset sequence is implemented.\n - 1 = a device specific reset sequence is implemented. -*/ -__STATIC_INLINE uint8_t RESET_TARGET (void) { - return (0U); // change to '1' when a device reset sequence is implemented -} - -///@} - - -#endif /* __DAP_CONFIG_H__ */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Include/DAP.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Include/DAP.h deleted file mode 100644 index 55cf686..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Include/DAP.h +++ /dev/null @@ -1,367 +0,0 @@ -/* - * Copyright (c) 2013-2022 ARM Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * ---------------------------------------------------------------------- - * - * $Date: 26. April 2022 - * $Revision: V2.1.1 - * - * Project: CMSIS-DAP Include - * Title: DAP.h Definitions - * - *---------------------------------------------------------------------------*/ - -#ifndef __DAP_H__ -#define __DAP_H__ - - -// DAP Firmware Version -#ifdef DAP_FW_V1 -#define DAP_FW_VER "1.3.0" -#else -#define DAP_FW_VER "2.1.1" -#endif - -// DAP Command IDs -#define ID_DAP_Info 0x00U -#define ID_DAP_HostStatus 0x01U -#define ID_DAP_Connect 0x02U -#define ID_DAP_Disconnect 0x03U -#define ID_DAP_TransferConfigure 0x04U -#define ID_DAP_Transfer 0x05U -#define ID_DAP_TransferBlock 0x06U -#define ID_DAP_TransferAbort 0x07U -#define ID_DAP_WriteABORT 0x08U -#define ID_DAP_Delay 0x09U -#define ID_DAP_ResetTarget 0x0AU -#define ID_DAP_SWJ_Pins 0x10U -#define ID_DAP_SWJ_Clock 0x11U -#define ID_DAP_SWJ_Sequence 0x12U -#define ID_DAP_SWD_Configure 0x13U -#define ID_DAP_SWD_Sequence 0x1DU -#define ID_DAP_JTAG_Sequence 0x14U -#define ID_DAP_JTAG_Configure 0x15U -#define ID_DAP_JTAG_IDCODE 0x16U -#define ID_DAP_SWO_Transport 0x17U -#define ID_DAP_SWO_Mode 0x18U -#define ID_DAP_SWO_Baudrate 0x19U -#define ID_DAP_SWO_Control 0x1AU -#define ID_DAP_SWO_Status 0x1BU -#define ID_DAP_SWO_ExtendedStatus 0x1EU -#define ID_DAP_SWO_Data 0x1CU -#define ID_DAP_UART_Transport 0x1FU -#define ID_DAP_UART_Configure 0x20U -#define ID_DAP_UART_Control 0x22U -#define ID_DAP_UART_Status 0x23U -#define ID_DAP_UART_Transfer 0x21U - -#define ID_DAP_QueueCommands 0x7EU -#define ID_DAP_ExecuteCommands 0x7FU - -// DAP Vendor Command IDs -#define ID_DAP_Vendor0 0x80U -#define ID_DAP_Vendor1 0x81U -#define ID_DAP_Vendor2 0x82U -#define ID_DAP_Vendor3 0x83U -#define ID_DAP_Vendor4 0x84U -#define ID_DAP_Vendor5 0x85U -#define ID_DAP_Vendor6 0x86U -#define ID_DAP_Vendor7 0x87U -#define ID_DAP_Vendor8 0x88U -#define ID_DAP_Vendor9 0x89U -#define ID_DAP_Vendor10 0x8AU -#define ID_DAP_Vendor11 0x8BU -#define ID_DAP_Vendor12 0x8CU -#define ID_DAP_Vendor13 0x8DU -#define ID_DAP_Vendor14 0x8EU -#define ID_DAP_Vendor15 0x8FU -#define ID_DAP_Vendor16 0x90U -#define ID_DAP_Vendor17 0x91U -#define ID_DAP_Vendor18 0x92U -#define ID_DAP_Vendor19 0x93U -#define ID_DAP_Vendor20 0x94U -#define ID_DAP_Vendor21 0x95U -#define ID_DAP_Vendor22 0x96U -#define ID_DAP_Vendor23 0x97U -#define ID_DAP_Vendor24 0x98U -#define ID_DAP_Vendor25 0x99U -#define ID_DAP_Vendor26 0x9AU -#define ID_DAP_Vendor27 0x9BU -#define ID_DAP_Vendor28 0x9CU -#define ID_DAP_Vendor29 0x9DU -#define ID_DAP_Vendor30 0x9EU -#define ID_DAP_Vendor31 0x9FU - -#define ID_DAP_Invalid 0xFFU - -// DAP Status Code -#define DAP_OK 0U -#define DAP_ERROR 0xFFU - -// DAP ID -#define DAP_ID_VENDOR 1U -#define DAP_ID_PRODUCT 2U -#define DAP_ID_SER_NUM 3U -#define DAP_ID_DAP_FW_VER 4U -#define DAP_ID_DEVICE_VENDOR 5U -#define DAP_ID_DEVICE_NAME 6U -#define DAP_ID_BOARD_VENDOR 7U -#define DAP_ID_BOARD_NAME 8U -#define DAP_ID_PRODUCT_FW_VER 9U -#define DAP_ID_CAPABILITIES 0xF0U -#define DAP_ID_TIMESTAMP_CLOCK 0xF1U -#define DAP_ID_UART_RX_BUFFER_SIZE 0xFBU -#define DAP_ID_UART_TX_BUFFER_SIZE 0xFCU -#define DAP_ID_SWO_BUFFER_SIZE 0xFDU -#define DAP_ID_PACKET_COUNT 0xFEU -#define DAP_ID_PACKET_SIZE 0xFFU - -// DAP Host Status -#define DAP_DEBUGGER_CONNECTED 0U -#define DAP_TARGET_RUNNING 1U - -// DAP Port -#define DAP_PORT_AUTODETECT 0U // Autodetect Port -#define DAP_PORT_DISABLED 0U // Port Disabled (I/O pins in High-Z) -#define DAP_PORT_SWD 1U // SWD Port (SWCLK, SWDIO) + nRESET -#define DAP_PORT_JTAG 2U // JTAG Port (TCK, TMS, TDI, TDO, nTRST) + nRESET - -// DAP SWJ Pins -#define DAP_SWJ_SWCLK_TCK 0 // SWCLK/TCK -#define DAP_SWJ_SWDIO_TMS 1 // SWDIO/TMS -#define DAP_SWJ_TDI 2 // TDI -#define DAP_SWJ_TDO 3 // TDO -#define DAP_SWJ_nTRST 5 // nTRST -#define DAP_SWJ_nRESET 7 // nRESET - -// DAP Transfer Request -#define DAP_TRANSFER_APnDP (1U<<0) -#define DAP_TRANSFER_RnW (1U<<1) -#define DAP_TRANSFER_A2 (1U<<2) -#define DAP_TRANSFER_A3 (1U<<3) -#define DAP_TRANSFER_MATCH_VALUE (1U<<4) -#define DAP_TRANSFER_MATCH_MASK (1U<<5) -#define DAP_TRANSFER_TIMESTAMP (1U<<7) - -// DAP Transfer Response -#define DAP_TRANSFER_OK (1U<<0) -#define DAP_TRANSFER_WAIT (1U<<1) -#define DAP_TRANSFER_FAULT (1U<<2) -#define DAP_TRANSFER_ERROR (1U<<3) -#define DAP_TRANSFER_MISMATCH (1U<<4) - -// DAP SWO Trace Mode -#define DAP_SWO_OFF 0U -#define DAP_SWO_UART 1U -#define DAP_SWO_MANCHESTER 2U - -// DAP SWO Trace Status -#define DAP_SWO_CAPTURE_ACTIVE (1U<<0) -#define DAP_SWO_CAPTURE_PAUSED (1U<<1) -#define DAP_SWO_STREAM_ERROR (1U<<6) -#define DAP_SWO_BUFFER_OVERRUN (1U<<7) - -// DAP UART Transport -#define DAP_UART_TRANSPORT_NONE 0U -#define DAP_UART_TRANSPORT_USB_COM_PORT 1U -#define DAP_UART_TRANSPORT_DAP_COMMAND 2U - -// DAP UART Control -#define DAP_UART_CONTROL_RX_ENABLE (1U<<0) -#define DAP_UART_CONTROL_RX_DISABLE (1U<<1) -#define DAP_UART_CONTROL_RX_BUF_FLUSH (1U<<2) -#define DAP_UART_CONTROL_TX_ENABLE (1U<<4) -#define DAP_UART_CONTROL_TX_DISABLE (1U<<5) -#define DAP_UART_CONTROL_TX_BUF_FLUSH (1U<<6) - -// DAP UART Status -#define DAP_UART_STATUS_RX_ENABLED (1U<<0) -#define DAP_UART_STATUS_RX_DATA_LOST (1U<<1) -#define DAP_UART_STATUS_FRAMING_ERROR (1U<<2) -#define DAP_UART_STATUS_PARITY_ERROR (1U<<3) -#define DAP_UART_STATUS_TX_ENABLED (1U<<4) - -// DAP UART Configure Error -#define DAP_UART_CFG_ERROR_DATA_BITS (1U<<0) -#define DAP_UART_CFG_ERROR_PARITY (1U<<1) -#define DAP_UART_CFG_ERROR_STOP_BITS (1U<<2) - -// Debug Port Register Addresses -#define DP_IDCODE 0x00U // IDCODE Register (SW Read only) -#define DP_ABORT 0x00U // Abort Register (SW Write only) -#define DP_CTRL_STAT 0x04U // Control & Status -#define DP_WCR 0x04U // Wire Control Register (SW Only) -#define DP_SELECT 0x08U // Select Register (JTAG R/W & SW W) -#define DP_RESEND 0x08U // Resend (SW Read Only) -#define DP_RDBUFF 0x0CU // Read Buffer (Read Only) - -// JTAG IR Codes -#define JTAG_ABORT 0x08U -#define JTAG_DPACC 0x0AU -#define JTAG_APACC 0x0BU -#define JTAG_IDCODE 0x0EU -#define JTAG_BYPASS 0x0FU - -// JTAG Sequence Info -#define JTAG_SEQUENCE_TCK 0x3FU // TCK count -#define JTAG_SEQUENCE_TMS 0x40U // TMS value -#define JTAG_SEQUENCE_TDO 0x80U // TDO capture - -// SWD Sequence Info -#define SWD_SEQUENCE_CLK 0x3FU // SWCLK count -#define SWD_SEQUENCE_DIN 0x80U // SWDIO capture - - -#include -#include -#include "cmsis_compiler.h" - -// DAP Data structure -typedef struct { - uint8_t debug_port; // Debug Port - uint8_t fast_clock; // Fast Clock Flag - uint8_t padding[2]; - uint32_t clock_delay; // Clock Delay - uint32_t timestamp; // Last captured Timestamp - struct { // Transfer Configuration - uint8_t idle_cycles; // Idle cycles after transfer - uint8_t padding[3]; - uint16_t retry_count; // Number of retries after WAIT response - uint16_t match_retry; // Number of retries if read value does not match - uint32_t match_mask; // Match Mask - } transfer; -#if (DAP_SWD != 0) - struct { // SWD Configuration - uint8_t turnaround; // Turnaround period - uint8_t data_phase; // Always generate Data Phase - } swd_conf; -#endif -#if (DAP_JTAG != 0) - struct { // JTAG Device Chain - uint8_t count; // Number of devices - uint8_t index; // Device index (device at TDO has index 0) -#if (DAP_JTAG_DEV_CNT != 0) - uint8_t ir_length[DAP_JTAG_DEV_CNT]; // IR Length in bits - uint16_t ir_before[DAP_JTAG_DEV_CNT]; // Bits before IR - uint16_t ir_after [DAP_JTAG_DEV_CNT]; // Bits after IR -#endif - } jtag_dev; -#endif -} DAP_Data_t; - -extern DAP_Data_t DAP_Data; // DAP Data -extern volatile uint8_t DAP_TransferAbort; // Transfer Abort Flag - - -#ifdef __cplusplus -extern "C" -{ -#endif - -// Functions -extern void SWJ_Sequence (uint32_t count, const uint8_t *data); -extern void SWD_Sequence (uint32_t info, const uint8_t *swdo, uint8_t *swdi); -extern void JTAG_Sequence (uint32_t info, const uint8_t *tdi, uint8_t *tdo); -extern void JTAG_IR (uint32_t ir); -extern uint32_t JTAG_ReadIDCode (void); -extern void JTAG_WriteAbort (uint32_t data); -extern uint8_t JTAG_Transfer (uint32_t request, uint32_t *data); -extern uint8_t SWD_Transfer (uint32_t request, uint32_t *data); - -extern void Delayms (uint32_t delay); - -extern uint32_t SWO_Transport (const uint8_t *request, uint8_t *response); -extern uint32_t SWO_Mode (const uint8_t *request, uint8_t *response); -extern uint32_t SWO_Baudrate (const uint8_t *request, uint8_t *response); -extern uint32_t SWO_Control (const uint8_t *request, uint8_t *response); -extern uint32_t SWO_Status (uint8_t *response); -extern uint32_t SWO_ExtendedStatus (const uint8_t *request, uint8_t *response); -extern uint32_t SWO_Data (const uint8_t *request, uint8_t *response); - -extern void SWO_QueueTransfer (uint8_t *buf, uint32_t num); -extern void SWO_AbortTransfer (void); -extern void SWO_TransferComplete (void); - -extern uint32_t SWO_Mode_UART (uint32_t enable); -extern uint32_t SWO_Baudrate_UART (uint32_t baudrate); -extern uint32_t SWO_Control_UART (uint32_t active); -extern void SWO_Capture_UART (uint8_t *buf, uint32_t num); -extern uint32_t SWO_GetCount_UART (void); - -extern uint32_t SWO_Mode_Manchester (uint32_t enable); -extern uint32_t SWO_Baudrate_Manchester (uint32_t baudrate); -extern uint32_t SWO_Control_Manchester (uint32_t active); -extern void SWO_Capture_Manchester (uint8_t *buf, uint32_t num); -extern uint32_t SWO_GetCount_Manchester (void); - -extern uint32_t UART_Transport (const uint8_t *request, uint8_t *response); -extern uint32_t UART_Configure (const uint8_t *request, uint8_t *response); -extern uint32_t UART_Control (const uint8_t *request, uint8_t *response); -extern uint32_t UART_Status (uint8_t *response); -extern uint32_t UART_Transfer (const uint8_t *request, uint8_t *response); - -extern uint8_t USB_COM_PORT_Activate (uint32_t cmd); - -extern uint32_t DAP_ProcessVendorCommand (const uint8_t *request, uint8_t *response); -extern uint32_t DAP_ProcessCommand (const uint8_t *request, uint8_t *response); -extern uint32_t DAP_ExecuteCommand (const uint8_t *request, uint8_t *response); - -extern void DAP_Setup (void); - -// Configurable delay for clock generation -#ifndef DELAY_SLOW_CYCLES -#define DELAY_SLOW_CYCLES 3U // Number of cycles for one iteration -#endif -#if defined(__CC_ARM) -__STATIC_FORCEINLINE void PIN_DELAY_SLOW (uint32_t delay) { - uint32_t count = delay; - while (--count); -} -#else -__STATIC_FORCEINLINE void PIN_DELAY_SLOW (uint32_t delay) { - __ASM volatile ( - ".syntax unified\n" - "0:\n\t" - "subs %0,%0,#1\n\t" - "bne 0b\n" - : "+l" (delay) : : "cc" - ); -} -#endif - -// Fixed delay for fast clock generation -#ifndef DELAY_FAST_CYCLES -#define DELAY_FAST_CYCLES 0U // Number of cycles: 0..3 -#endif -__STATIC_FORCEINLINE void PIN_DELAY_FAST (void) { -#if (DELAY_FAST_CYCLES >= 1U) - __NOP(); -#endif -#if (DELAY_FAST_CYCLES >= 2U) - __NOP(); -#endif -#if (DELAY_FAST_CYCLES >= 3U) - __NOP(); -#endif -} - -#ifdef __cplusplus -} -#endif - - -#endif /* __DAP_H__ */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/DAP.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/DAP.c deleted file mode 100644 index 606917e..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/DAP.c +++ /dev/null @@ -1,1812 +0,0 @@ -/* - * Copyright (c) 2013-2022 ARM Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * ---------------------------------------------------------------------- - * - * $Date: 26. April 2022 - * $Revision: V2.1.1 - * - * Project: CMSIS-DAP Source - * Title: DAP.c CMSIS-DAP Commands - * - *---------------------------------------------------------------------------*/ - -#include -#include "DAP_config.h" -#include "DAP.h" - - -#if (DAP_PACKET_SIZE < 64U) -#error "Minimum Packet Size is 64!" -#endif -#if (DAP_PACKET_SIZE > 32768U) -#error "Maximum Packet Size is 32768!" -#endif -#if (DAP_PACKET_COUNT < 1U) -#error "Minimum Packet Count is 1!" -#endif -#if (DAP_PACKET_COUNT > 255U) -#error "Maximum Packet Count is 255!" -#endif - - -// Clock Macros -#define MAX_SWJ_CLOCK(delay_cycles) \ - ((CPU_CLOCK/2U) / (IO_PORT_WRITE_CYCLES + delay_cycles)) - - - DAP_Data_t DAP_Data; // DAP Data -volatile uint8_t DAP_TransferAbort; // Transfer Abort Flag - - -static const char DAP_FW_Ver [] = DAP_FW_VER; - - -// Common clock delay calculation routine -// clock: requested SWJ frequency in Hertz -static void Set_Clock_Delay(uint32_t clock) { - uint32_t delay; - - if (clock >= MAX_SWJ_CLOCK(DELAY_FAST_CYCLES)) { - DAP_Data.fast_clock = 1U; - DAP_Data.clock_delay = 1U; - } else { - DAP_Data.fast_clock = 0U; - - delay = ((CPU_CLOCK/2U) + (clock - 1U)) / clock; - if (delay > IO_PORT_WRITE_CYCLES) { - delay -= IO_PORT_WRITE_CYCLES; - delay = (delay + (DELAY_SLOW_CYCLES - 1U)) / DELAY_SLOW_CYCLES; - } else { - delay = 1U; - } - - DAP_Data.clock_delay = delay; - } -} - - -// Get DAP Information -// id: info identifier -// info: pointer to info data -// return: number of bytes in info data -static uint8_t DAP_Info(uint8_t id, uint8_t *info) { - uint8_t length = 0U; - - switch (id) { - case DAP_ID_VENDOR: - length = DAP_GetVendorString((char *)info); - break; - case DAP_ID_PRODUCT: - length = DAP_GetProductString((char *)info); - break; - case DAP_ID_SER_NUM: - length = DAP_GetSerNumString((char *)info); - break; - case DAP_ID_DAP_FW_VER: - length = (uint8_t)sizeof(DAP_FW_Ver); - memcpy(info, DAP_FW_Ver, length); - break; - case DAP_ID_DEVICE_VENDOR: - length = DAP_GetTargetDeviceVendorString((char *)info); - break; - case DAP_ID_DEVICE_NAME: - length = DAP_GetTargetDeviceNameString((char *)info); - break; - case DAP_ID_BOARD_VENDOR: - length = DAP_GetTargetBoardVendorString((char *)info); - break; - case DAP_ID_BOARD_NAME: - length = DAP_GetTargetBoardNameString((char *)info); - break; - case DAP_ID_PRODUCT_FW_VER: - length = DAP_GetProductFirmwareVersionString((char *)info); - break; - case DAP_ID_CAPABILITIES: - info[0] = ((DAP_SWD != 0) ? (1U << 0) : 0U) | - ((DAP_JTAG != 0) ? (1U << 1) : 0U) | - ((SWO_UART != 0) ? (1U << 2) : 0U) | - ((SWO_MANCHESTER != 0) ? (1U << 3) : 0U) | - /* Atomic Commands */ (1U << 4) | - ((TIMESTAMP_CLOCK != 0U) ? (1U << 5) : 0U) | - ((SWO_STREAM != 0U) ? (1U << 6) : 0U) | - ((DAP_UART != 0U) ? (1U << 7) : 0U); - - info[1] = ((DAP_UART_USB_COM_PORT != 0) ? (1U << 0) : 0U); - length = 2U; - break; - case DAP_ID_TIMESTAMP_CLOCK: -#if (TIMESTAMP_CLOCK != 0U) - info[0] = (uint8_t)(TIMESTAMP_CLOCK >> 0); - info[1] = (uint8_t)(TIMESTAMP_CLOCK >> 8); - info[2] = (uint8_t)(TIMESTAMP_CLOCK >> 16); - info[3] = (uint8_t)(TIMESTAMP_CLOCK >> 24); - length = 4U; -#endif - break; - case DAP_ID_UART_RX_BUFFER_SIZE: -#if (DAP_UART != 0) - info[0] = (uint8_t)(DAP_UART_RX_BUFFER_SIZE >> 0); - info[1] = (uint8_t)(DAP_UART_RX_BUFFER_SIZE >> 8); - info[2] = (uint8_t)(DAP_UART_RX_BUFFER_SIZE >> 16); - info[3] = (uint8_t)(DAP_UART_RX_BUFFER_SIZE >> 24); - length = 4U; -#endif - break; - case DAP_ID_UART_TX_BUFFER_SIZE: -#if (DAP_UART != 0) - info[0] = (uint8_t)(DAP_UART_TX_BUFFER_SIZE >> 0); - info[1] = (uint8_t)(DAP_UART_TX_BUFFER_SIZE >> 8); - info[2] = (uint8_t)(DAP_UART_TX_BUFFER_SIZE >> 16); - info[3] = (uint8_t)(DAP_UART_TX_BUFFER_SIZE >> 24); - length = 4U; -#endif - break; - case DAP_ID_SWO_BUFFER_SIZE: -#if ((SWO_UART != 0) || (SWO_MANCHESTER != 0)) - info[0] = (uint8_t)(SWO_BUFFER_SIZE >> 0); - info[1] = (uint8_t)(SWO_BUFFER_SIZE >> 8); - info[2] = (uint8_t)(SWO_BUFFER_SIZE >> 16); - info[3] = (uint8_t)(SWO_BUFFER_SIZE >> 24); - length = 4U; -#endif - break; - case DAP_ID_PACKET_SIZE: - info[0] = (uint8_t)(DAP_PACKET_SIZE >> 0); - info[1] = (uint8_t)(DAP_PACKET_SIZE >> 8); - length = 2U; - break; - case DAP_ID_PACKET_COUNT: - info[0] = DAP_PACKET_COUNT; - length = 1U; - break; - default: - break; - } - - return (length); -} - - -// Delay for specified time -// delay: delay time in ms -void Delayms(uint32_t delay) { - delay *= ((CPU_CLOCK/1000U) + (DELAY_SLOW_CYCLES-1U)) / DELAY_SLOW_CYCLES; - PIN_DELAY_SLOW(delay); -} - - -// Process Delay command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -static uint32_t DAP_Delay(const uint8_t *request, uint8_t *response) { - uint32_t delay; - - delay = (uint32_t)(*(request+0)) | - (uint32_t)(*(request+1) << 8); - delay *= ((CPU_CLOCK/1000000U) + (DELAY_SLOW_CYCLES-1U)) / DELAY_SLOW_CYCLES; - - PIN_DELAY_SLOW(delay); - - *response = DAP_OK; - return ((2U << 16) | 1U); -} - - -// Process Host Status command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -static uint32_t DAP_HostStatus(const uint8_t *request, uint8_t *response) { - - switch (*request) { - case DAP_DEBUGGER_CONNECTED: - LED_CONNECTED_OUT((*(request+1) & 1U)); - break; - case DAP_TARGET_RUNNING: - LED_RUNNING_OUT((*(request+1) & 1U)); - break; - default: - *response = DAP_ERROR; - return ((2U << 16) | 1U); - } - - *response = DAP_OK; - return ((2U << 16) | 1U); -} - - -// Process Connect command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -static uint32_t DAP_Connect(const uint8_t *request, uint8_t *response) { - uint32_t port; - - if (*request == DAP_PORT_AUTODETECT) { - port = DAP_DEFAULT_PORT; - } else { - port = *request; - } - - switch (port) { -#if (DAP_SWD != 0) - case DAP_PORT_SWD: - DAP_Data.debug_port = DAP_PORT_SWD; - PORT_SWD_SETUP(); - break; -#endif -#if (DAP_JTAG != 0) - case DAP_PORT_JTAG: - DAP_Data.debug_port = DAP_PORT_JTAG; - PORT_JTAG_SETUP(); - break; -#endif - default: - port = DAP_PORT_DISABLED; - break; - } - - *response = (uint8_t)port; - return ((1U << 16) | 1U); -} - - -// Process Disconnect command and prepare response -// response: pointer to response data -// return: number of bytes in response -static uint32_t DAP_Disconnect(uint8_t *response) { - - DAP_Data.debug_port = DAP_PORT_DISABLED; - PORT_OFF(); - - *response = DAP_OK; - return (1U); -} - - -// Process Reset Target command and prepare response -// response: pointer to response data -// return: number of bytes in response -static uint32_t DAP_ResetTarget(uint8_t *response) { - - *(response+1) = RESET_TARGET(); - *(response+0) = DAP_OK; - return (2U); -} - - -// Process SWJ Pins command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -static uint32_t DAP_SWJ_Pins(const uint8_t *request, uint8_t *response) { -#if ((DAP_SWD != 0) || (DAP_JTAG != 0)) - uint32_t value; - uint32_t select; - uint32_t wait; - uint32_t timestamp; - - value = (uint32_t) *(request+0); - select = (uint32_t) *(request+1); - wait = (uint32_t)(*(request+2) << 0) | - (uint32_t)(*(request+3) << 8) | - (uint32_t)(*(request+4) << 16) | - (uint32_t)(*(request+5) << 24); - - if ((select & (1U << DAP_SWJ_SWCLK_TCK)) != 0U) { - if ((value & (1U << DAP_SWJ_SWCLK_TCK)) != 0U) { - PIN_SWCLK_TCK_SET(); - } else { - PIN_SWCLK_TCK_CLR(); - } - } - if ((select & (1U << DAP_SWJ_SWDIO_TMS)) != 0U) { - if ((value & (1U << DAP_SWJ_SWDIO_TMS)) != 0U) { - PIN_SWDIO_TMS_SET(); - } else { - PIN_SWDIO_TMS_CLR(); - } - } - if ((select & (1U << DAP_SWJ_TDI)) != 0U) { - PIN_TDI_OUT(value >> DAP_SWJ_TDI); - } - if ((select & (1U << DAP_SWJ_nTRST)) != 0U) { - PIN_nTRST_OUT(value >> DAP_SWJ_nTRST); - } - if ((select & (1U << DAP_SWJ_nRESET)) != 0U){ - PIN_nRESET_OUT(value >> DAP_SWJ_nRESET); - } - - if (wait != 0U) { -#if (TIMESTAMP_CLOCK != 0U) - if (wait > 3000000U) { - wait = 3000000U; - } -#if (TIMESTAMP_CLOCK >= 1000000U) - wait *= TIMESTAMP_CLOCK / 1000000U; -#else - wait /= 1000000U / TIMESTAMP_CLOCK; -#endif -#else - wait = 1U; -#endif - timestamp = TIMESTAMP_GET(); - do { - if ((select & (1U << DAP_SWJ_SWCLK_TCK)) != 0U) { - if ((value >> DAP_SWJ_SWCLK_TCK) ^ PIN_SWCLK_TCK_IN()) { - continue; - } - } - if ((select & (1U << DAP_SWJ_SWDIO_TMS)) != 0U) { - if ((value >> DAP_SWJ_SWDIO_TMS) ^ PIN_SWDIO_TMS_IN()) { - continue; - } - } - if ((select & (1U << DAP_SWJ_TDI)) != 0U) { - if ((value >> DAP_SWJ_TDI) ^ PIN_TDI_IN()) { - continue; - } - } - if ((select & (1U << DAP_SWJ_nTRST)) != 0U) { - if ((value >> DAP_SWJ_nTRST) ^ PIN_nTRST_IN()) { - continue; - } - } - if ((select & (1U << DAP_SWJ_nRESET)) != 0U) { - if ((value >> DAP_SWJ_nRESET) ^ PIN_nRESET_IN()) { - continue; - } - } - break; - } while ((TIMESTAMP_GET() - timestamp) < wait); - } - - value = (PIN_SWCLK_TCK_IN() << DAP_SWJ_SWCLK_TCK) | - (PIN_SWDIO_TMS_IN() << DAP_SWJ_SWDIO_TMS) | - (PIN_TDI_IN() << DAP_SWJ_TDI) | - (PIN_TDO_IN() << DAP_SWJ_TDO) | - (PIN_nTRST_IN() << DAP_SWJ_nTRST) | - (PIN_nRESET_IN() << DAP_SWJ_nRESET); - - *response = (uint8_t)value; -#else - *response = 0U; -#endif - - return ((6U << 16) | 1U); -} - - -// Process SWJ Clock command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -static uint32_t DAP_SWJ_Clock(const uint8_t *request, uint8_t *response) { -#if ((DAP_SWD != 0) || (DAP_JTAG != 0)) - uint32_t clock; - uint32_t delay; - - clock = (uint32_t)(*(request+0) << 0) | - (uint32_t)(*(request+1) << 8) | - (uint32_t)(*(request+2) << 16) | - (uint32_t)(*(request+3) << 24); - - if (clock == 0U) { - *response = DAP_ERROR; - return ((4U << 16) | 1U); - } - - Set_Clock_Delay(clock); - - *response = DAP_OK; -#else - *response = DAP_ERROR; -#endif - - return ((4U << 16) | 1U); -} - - -// Process SWJ Sequence command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -static uint32_t DAP_SWJ_Sequence(const uint8_t *request, uint8_t *response) { - uint32_t count; - - count = *request++; - if (count == 0U) { - count = 256U; - } - -#if ((DAP_SWD != 0) || (DAP_JTAG != 0)) - SWJ_Sequence(count, request); - *response = DAP_OK; -#else - *response = DAP_ERROR; -#endif - - count = (count + 7U) >> 3; - - return (((count + 1U) << 16) | 1U); -} - - -// Process SWD Configure command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -static uint32_t DAP_SWD_Configure(const uint8_t *request, uint8_t *response) { -#if (DAP_SWD != 0) - uint8_t value; - - value = *request; - DAP_Data.swd_conf.turnaround = (value & 0x03U) + 1U; - DAP_Data.swd_conf.data_phase = (value & 0x04U) ? 1U : 0U; - - *response = DAP_OK; -#else - *response = DAP_ERROR; -#endif - - return ((1U << 16) | 1U); -} - - -// Process SWD Sequence command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -static uint32_t DAP_SWD_Sequence(const uint8_t *request, uint8_t *response) { - uint32_t sequence_info; - uint32_t sequence_count; - uint32_t request_count; - uint32_t response_count; - uint32_t count; - -#if (DAP_SWD != 0) - *response++ = DAP_OK; -#else - *response++ = DAP_ERROR; -#endif - request_count = 1U; - response_count = 1U; - - sequence_count = *request++; - while (sequence_count--) { - sequence_info = *request++; - count = sequence_info & SWD_SEQUENCE_CLK; - if (count == 0U) { - count = 64U; - } - count = (count + 7U) / 8U; -#if (DAP_SWD != 0) - if ((sequence_info & SWD_SEQUENCE_DIN) != 0U) { - PIN_SWDIO_OUT_DISABLE(); - } else { - PIN_SWDIO_OUT_ENABLE(); - } - SWD_Sequence(sequence_info, request, response); - if (sequence_count == 0U) { - PIN_SWDIO_OUT_ENABLE(); - } -#endif - if ((sequence_info & SWD_SEQUENCE_DIN) != 0U) { - request_count++; -#if (DAP_SWD != 0) - response += count; - response_count += count; -#endif - } else { - request += count; - request_count += count + 1U; - } - } - - return ((request_count << 16) | response_count); -} - - -// Process JTAG Sequence command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -static uint32_t DAP_JTAG_Sequence(const uint8_t *request, uint8_t *response) { - uint32_t sequence_info; - uint32_t sequence_count; - uint32_t request_count; - uint32_t response_count; - uint32_t count; - -#if (DAP_JTAG != 0) - *response++ = DAP_OK; -#else - *response++ = DAP_ERROR; -#endif - request_count = 1U; - response_count = 1U; - - sequence_count = *request++; - while (sequence_count--) { - sequence_info = *request++; - count = sequence_info & JTAG_SEQUENCE_TCK; - if (count == 0U) { - count = 64U; - } - count = (count + 7U) / 8U; -#if (DAP_JTAG != 0) - JTAG_Sequence(sequence_info, request, response); -#endif - request += count; - request_count += count + 1U; -#if (DAP_JTAG != 0) - if ((sequence_info & JTAG_SEQUENCE_TDO) != 0U) { - response += count; - response_count += count; - } -#endif - } - - return ((request_count << 16) | response_count); -} - - -// Process JTAG Configure command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -static uint32_t DAP_JTAG_Configure(const uint8_t *request, uint8_t *response) { - uint32_t count; -#if (DAP_JTAG != 0) - uint32_t length; - uint32_t bits; - uint32_t n; - - count = *request++; - DAP_Data.jtag_dev.count = (uint8_t)count; - - bits = 0U; - for (n = 0U; n < count; n++) { - length = *request++; - DAP_Data.jtag_dev.ir_length[n] = (uint8_t)length; - DAP_Data.jtag_dev.ir_before[n] = (uint16_t)bits; - bits += length; - } - for (n = 0U; n < count; n++) { - bits -= DAP_Data.jtag_dev.ir_length[n]; - DAP_Data.jtag_dev.ir_after[n] = (uint16_t)bits; - } - - *response = DAP_OK; -#else - count = *request; - *response = DAP_ERROR; -#endif - - return (((count + 1U) << 16) | 1U); -} - - -// Process JTAG IDCODE command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -static uint32_t DAP_JTAG_IDCode(const uint8_t *request, uint8_t *response) { -#if (DAP_JTAG != 0) - uint32_t data; - - if (DAP_Data.debug_port != DAP_PORT_JTAG) { - goto id_error; - } - - // Device index (JTAP TAP) - DAP_Data.jtag_dev.index = *request; - if (DAP_Data.jtag_dev.index >= DAP_Data.jtag_dev.count) { - goto id_error; - } - - // Select JTAG chain - JTAG_IR(JTAG_IDCODE); - - // Read IDCODE register - data = JTAG_ReadIDCode(); - - // Store Data - *(response+0) = DAP_OK; - *(response+1) = (uint8_t)(data >> 0); - *(response+2) = (uint8_t)(data >> 8); - *(response+3) = (uint8_t)(data >> 16); - *(response+4) = (uint8_t)(data >> 24); - - return ((1U << 16) | 5U); - -id_error: -#endif - *response = DAP_ERROR; - return ((1U << 16) | 1U); -} - - -// Process Transfer Configure command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -static uint32_t DAP_TransferConfigure(const uint8_t *request, uint8_t *response) { - - DAP_Data.transfer.idle_cycles = *(request+0); - DAP_Data.transfer.retry_count = (uint16_t) *(request+1) | - (uint16_t)(*(request+2) << 8); - DAP_Data.transfer.match_retry = (uint16_t) *(request+3) | - (uint16_t)(*(request+4) << 8); - - *response = DAP_OK; - return ((5U << 16) | 1U); -} - - -// Process SWD Transfer command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -#if (DAP_SWD != 0) -static uint32_t DAP_SWD_Transfer(const uint8_t *request, uint8_t *response) { - const - uint8_t *request_head; - uint32_t request_count; - uint32_t request_value; - uint8_t *response_head; - uint32_t response_count; - uint32_t response_value; - uint32_t post_read; - uint32_t check_write; - uint32_t match_value; - uint32_t match_retry; - uint32_t retry; - uint32_t data; -#if (TIMESTAMP_CLOCK != 0U) - uint32_t timestamp; -#endif - - request_head = request; - - response_count = 0U; - response_value = 0U; - response_head = response; - response += 2; - - DAP_TransferAbort = 0U; - - post_read = 0U; - check_write = 0U; - - request++; // Ignore DAP index - - request_count = *request++; - - for (; request_count != 0U; request_count--) { - request_value = *request++; - if ((request_value & DAP_TRANSFER_RnW) != 0U) { - // Read register - if (post_read) { - // Read was posted before - retry = DAP_Data.transfer.retry_count; - if ((request_value & (DAP_TRANSFER_APnDP | DAP_TRANSFER_MATCH_VALUE)) == DAP_TRANSFER_APnDP) { - // Read previous AP data and post next AP read - do { - response_value = SWD_Transfer(request_value, &data); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - } else { - // Read previous AP data - do { - response_value = SWD_Transfer(DP_RDBUFF | DAP_TRANSFER_RnW, &data); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - post_read = 0U; - } - if (response_value != DAP_TRANSFER_OK) { - break; - } - // Store previous AP data - *response++ = (uint8_t) data; - *response++ = (uint8_t)(data >> 8); - *response++ = (uint8_t)(data >> 16); - *response++ = (uint8_t)(data >> 24); -#if (TIMESTAMP_CLOCK != 0U) - if (post_read) { - // Store Timestamp of next AP read - if ((request_value & DAP_TRANSFER_TIMESTAMP) != 0U) { - timestamp = DAP_Data.timestamp; - *response++ = (uint8_t) timestamp; - *response++ = (uint8_t)(timestamp >> 8); - *response++ = (uint8_t)(timestamp >> 16); - *response++ = (uint8_t)(timestamp >> 24); - } - } -#endif - } - if ((request_value & DAP_TRANSFER_MATCH_VALUE) != 0U) { - // Read with value match - match_value = (uint32_t)(*(request+0) << 0) | - (uint32_t)(*(request+1) << 8) | - (uint32_t)(*(request+2) << 16) | - (uint32_t)(*(request+3) << 24); - request += 4; - match_retry = DAP_Data.transfer.match_retry; - if ((request_value & DAP_TRANSFER_APnDP) != 0U) { - // Post AP read - retry = DAP_Data.transfer.retry_count; - do { - response_value = SWD_Transfer(request_value, NULL); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - break; - } - } - do { - // Read register until its value matches or retry counter expires - retry = DAP_Data.transfer.retry_count; - do { - response_value = SWD_Transfer(request_value, &data); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - break; - } - } while (((data & DAP_Data.transfer.match_mask) != match_value) && match_retry-- && !DAP_TransferAbort); - if ((data & DAP_Data.transfer.match_mask) != match_value) { - response_value |= DAP_TRANSFER_MISMATCH; - } - if (response_value != DAP_TRANSFER_OK) { - break; - } - } else { - // Normal read - retry = DAP_Data.transfer.retry_count; - if ((request_value & DAP_TRANSFER_APnDP) != 0U) { - // Read AP register - if (post_read == 0U) { - // Post AP read - do { - response_value = SWD_Transfer(request_value, NULL); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - break; - } -#if (TIMESTAMP_CLOCK != 0U) - // Store Timestamp - if ((request_value & DAP_TRANSFER_TIMESTAMP) != 0U) { - timestamp = DAP_Data.timestamp; - *response++ = (uint8_t) timestamp; - *response++ = (uint8_t)(timestamp >> 8); - *response++ = (uint8_t)(timestamp >> 16); - *response++ = (uint8_t)(timestamp >> 24); - } -#endif - post_read = 1U; - } - } else { - // Read DP register - do { - response_value = SWD_Transfer(request_value, &data); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - break; - } -#if (TIMESTAMP_CLOCK != 0U) - // Store Timestamp - if ((request_value & DAP_TRANSFER_TIMESTAMP) != 0U) { - timestamp = DAP_Data.timestamp; - *response++ = (uint8_t) timestamp; - *response++ = (uint8_t)(timestamp >> 8); - *response++ = (uint8_t)(timestamp >> 16); - *response++ = (uint8_t)(timestamp >> 24); - } -#endif - // Store data - *response++ = (uint8_t) data; - *response++ = (uint8_t)(data >> 8); - *response++ = (uint8_t)(data >> 16); - *response++ = (uint8_t)(data >> 24); - } - } - check_write = 0U; - } else { - // Write register - if (post_read) { - // Read previous data - retry = DAP_Data.transfer.retry_count; - do { - response_value = SWD_Transfer(DP_RDBUFF | DAP_TRANSFER_RnW, &data); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - break; - } - // Store previous data - *response++ = (uint8_t) data; - *response++ = (uint8_t)(data >> 8); - *response++ = (uint8_t)(data >> 16); - *response++ = (uint8_t)(data >> 24); - post_read = 0U; - } - // Load data - data = (uint32_t)(*(request+0) << 0) | - (uint32_t)(*(request+1) << 8) | - (uint32_t)(*(request+2) << 16) | - (uint32_t)(*(request+3) << 24); - request += 4; - if ((request_value & DAP_TRANSFER_MATCH_MASK) != 0U) { - // Write match mask - DAP_Data.transfer.match_mask = data; - response_value = DAP_TRANSFER_OK; - } else { - // Write DP/AP register - retry = DAP_Data.transfer.retry_count; - do { - response_value = SWD_Transfer(request_value, &data); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - break; - } -#if (TIMESTAMP_CLOCK != 0U) - // Store Timestamp - if ((request_value & DAP_TRANSFER_TIMESTAMP) != 0U) { - timestamp = DAP_Data.timestamp; - *response++ = (uint8_t) timestamp; - *response++ = (uint8_t)(timestamp >> 8); - *response++ = (uint8_t)(timestamp >> 16); - *response++ = (uint8_t)(timestamp >> 24); - } -#endif - check_write = 1U; - } - } - response_count++; - if (DAP_TransferAbort) { - break; - } - } - - for (; request_count != 0U; request_count--) { - // Process canceled requests - request_value = *request++; - if ((request_value & DAP_TRANSFER_RnW) != 0U) { - // Read register - if ((request_value & DAP_TRANSFER_MATCH_VALUE) != 0U) { - // Read with value match - request += 4; - } - } else { - // Write register - request += 4; - } - } - - if (response_value == DAP_TRANSFER_OK) { - if (post_read) { - // Read previous data - retry = DAP_Data.transfer.retry_count; - do { - response_value = SWD_Transfer(DP_RDBUFF | DAP_TRANSFER_RnW, &data); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - goto end; - } - // Store previous data - *response++ = (uint8_t) data; - *response++ = (uint8_t)(data >> 8); - *response++ = (uint8_t)(data >> 16); - *response++ = (uint8_t)(data >> 24); - } else if (check_write) { - // Check last write - retry = DAP_Data.transfer.retry_count; - do { - response_value = SWD_Transfer(DP_RDBUFF | DAP_TRANSFER_RnW, NULL); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - } - } - -end: - *(response_head+0) = (uint8_t)response_count; - *(response_head+1) = (uint8_t)response_value; - - return (((uint32_t)(request - request_head) << 16) | (uint32_t)(response - response_head)); -} -#endif - - -// Process JTAG Transfer command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -#if (DAP_JTAG != 0) -static uint32_t DAP_JTAG_Transfer(const uint8_t *request, uint8_t *response) { - const - uint8_t *request_head; - uint32_t request_count; - uint32_t request_value; - uint32_t request_ir; - uint8_t *response_head; - uint32_t response_count; - uint32_t response_value; - uint32_t post_read; - uint32_t match_value; - uint32_t match_retry; - uint32_t retry; - uint32_t data; - uint32_t ir; -#if (TIMESTAMP_CLOCK != 0U) - uint32_t timestamp; -#endif - - request_head = request; - - response_count = 0U; - response_value = 0U; - response_head = response; - response += 2; - - DAP_TransferAbort = 0U; - - ir = 0U; - post_read = 0U; - - // Device index (JTAP TAP) - DAP_Data.jtag_dev.index = *request++; - if (DAP_Data.jtag_dev.index >= DAP_Data.jtag_dev.count) { - goto end; - } - - request_count = *request++; - - for (; request_count != 0U; request_count--) { - request_value = *request++; - request_ir = (request_value & DAP_TRANSFER_APnDP) ? JTAG_APACC : JTAG_DPACC; - if ((request_value & DAP_TRANSFER_RnW) != 0U) { - // Read register - if (post_read) { - // Read was posted before - retry = DAP_Data.transfer.retry_count; - if ((ir == request_ir) && ((request_value & DAP_TRANSFER_MATCH_VALUE) == 0U)) { - // Read previous data and post next read - do { - response_value = JTAG_Transfer(request_value, &data); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - } else { - // Select JTAG chain - if (ir != JTAG_DPACC) { - ir = JTAG_DPACC; - JTAG_IR(ir); - } - // Read previous data - do { - response_value = JTAG_Transfer(DP_RDBUFF | DAP_TRANSFER_RnW, &data); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - post_read = 0U; - } - if (response_value != DAP_TRANSFER_OK) { - break; - } - // Store previous data - *response++ = (uint8_t) data; - *response++ = (uint8_t)(data >> 8); - *response++ = (uint8_t)(data >> 16); - *response++ = (uint8_t)(data >> 24); -#if (TIMESTAMP_CLOCK != 0U) - if (post_read) { - // Store Timestamp of next AP read - if ((request_value & DAP_TRANSFER_TIMESTAMP) != 0U) { - timestamp = DAP_Data.timestamp; - *response++ = (uint8_t) timestamp; - *response++ = (uint8_t)(timestamp >> 8); - *response++ = (uint8_t)(timestamp >> 16); - *response++ = (uint8_t)(timestamp >> 24); - } - } -#endif - } - if ((request_value & DAP_TRANSFER_MATCH_VALUE) != 0U) { - // Read with value match - match_value = (uint32_t)(*(request+0) << 0) | - (uint32_t)(*(request+1) << 8) | - (uint32_t)(*(request+2) << 16) | - (uint32_t)(*(request+3) << 24); - request += 4; - match_retry = DAP_Data.transfer.match_retry; - // Select JTAG chain - if (ir != request_ir) { - ir = request_ir; - JTAG_IR(ir); - } - // Post DP/AP read - retry = DAP_Data.transfer.retry_count; - do { - response_value = JTAG_Transfer(request_value, NULL); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - break; - } - do { - // Read register until its value matches or retry counter expires - retry = DAP_Data.transfer.retry_count; - do { - response_value = JTAG_Transfer(request_value, &data); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - break; - } - } while (((data & DAP_Data.transfer.match_mask) != match_value) && match_retry-- && !DAP_TransferAbort); - if ((data & DAP_Data.transfer.match_mask) != match_value) { - response_value |= DAP_TRANSFER_MISMATCH; - } - if (response_value != DAP_TRANSFER_OK) { - break; - } - } else { - // Normal read - if (post_read == 0U) { - // Select JTAG chain - if (ir != request_ir) { - ir = request_ir; - JTAG_IR(ir); - } - // Post DP/AP read - retry = DAP_Data.transfer.retry_count; - do { - response_value = JTAG_Transfer(request_value, NULL); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - break; - } -#if (TIMESTAMP_CLOCK != 0U) - // Store Timestamp - if ((request_value & DAP_TRANSFER_TIMESTAMP) != 0U) { - timestamp = DAP_Data.timestamp; - *response++ = (uint8_t) timestamp; - *response++ = (uint8_t)(timestamp >> 8); - *response++ = (uint8_t)(timestamp >> 16); - *response++ = (uint8_t)(timestamp >> 24); - } -#endif - post_read = 1U; - } - } - } else { - // Write register - if (post_read) { - // Select JTAG chain - if (ir != JTAG_DPACC) { - ir = JTAG_DPACC; - JTAG_IR(ir); - } - // Read previous data - retry = DAP_Data.transfer.retry_count; - do { - response_value = JTAG_Transfer(DP_RDBUFF | DAP_TRANSFER_RnW, &data); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - break; - } - // Store previous data - *response++ = (uint8_t) data; - *response++ = (uint8_t)(data >> 8); - *response++ = (uint8_t)(data >> 16); - *response++ = (uint8_t)(data >> 24); - post_read = 0U; - } - // Load data - data = (uint32_t)(*(request+0) << 0) | - (uint32_t)(*(request+1) << 8) | - (uint32_t)(*(request+2) << 16) | - (uint32_t)(*(request+3) << 24); - request += 4; - if ((request_value & DAP_TRANSFER_MATCH_MASK) != 0U) { - // Write match mask - DAP_Data.transfer.match_mask = data; - response_value = DAP_TRANSFER_OK; - } else { - // Select JTAG chain - if (ir != request_ir) { - ir = request_ir; - JTAG_IR(ir); - } - // Write DP/AP register - retry = DAP_Data.transfer.retry_count; - do { - response_value = JTAG_Transfer(request_value, &data); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - break; - } -#if (TIMESTAMP_CLOCK != 0U) - // Store Timestamp - if ((request_value & DAP_TRANSFER_TIMESTAMP) != 0U) { - timestamp = DAP_Data.timestamp; - *response++ = (uint8_t) timestamp; - *response++ = (uint8_t)(timestamp >> 8); - *response++ = (uint8_t)(timestamp >> 16); - *response++ = (uint8_t)(timestamp >> 24); - } -#endif - } - } - response_count++; - if (DAP_TransferAbort) { - break; - } - } - - for (; request_count != 0U; request_count--) { - // Process canceled requests - request_value = *request++; - if ((request_value & DAP_TRANSFER_RnW) != 0U) { - // Read register - if ((request_value & DAP_TRANSFER_MATCH_VALUE) != 0U) { - // Read with value match - request += 4; - } - } else { - // Write register - request += 4; - } - } - - if (response_value == DAP_TRANSFER_OK) { - // Select JTAG chain - if (ir != JTAG_DPACC) { - ir = JTAG_DPACC; - JTAG_IR(ir); - } - if (post_read) { - // Read previous data - retry = DAP_Data.transfer.retry_count; - do { - response_value = JTAG_Transfer(DP_RDBUFF | DAP_TRANSFER_RnW, &data); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - goto end; - } - // Store previous data - *response++ = (uint8_t) data; - *response++ = (uint8_t)(data >> 8); - *response++ = (uint8_t)(data >> 16); - *response++ = (uint8_t)(data >> 24); - } else { - // Check last write - retry = DAP_Data.transfer.retry_count; - do { - response_value = JTAG_Transfer(DP_RDBUFF | DAP_TRANSFER_RnW, NULL); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - } - } - -end: - *(response_head+0) = (uint8_t)response_count; - *(response_head+1) = (uint8_t)response_value; - - return (((uint32_t)(request - request_head) << 16) | (uint32_t)(response - response_head)); -} -#endif - - -// Process Dummy Transfer command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -static uint32_t DAP_Dummy_Transfer(const uint8_t *request, uint8_t *response) { - const - uint8_t *request_head; - uint32_t request_count; - uint32_t request_value; - - request_head = request; - - request++; // Ignore DAP index - - request_count = *request++; - - for (; request_count != 0U; request_count--) { - // Process dummy requests - request_value = *request++; - if ((request_value & DAP_TRANSFER_RnW) != 0U) { - // Read register - if ((request_value & DAP_TRANSFER_MATCH_VALUE) != 0U) { - // Read with value match - request += 4; - } - } else { - // Write register - request += 4; - } - } - - *(response+0) = 0U; // Response count - *(response+1) = 0U; // Response value - - return (((uint32_t)(request - request_head) << 16) | 2U); -} - - -// Process Transfer command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -static uint32_t DAP_Transfer(const uint8_t *request, uint8_t *response) { - uint32_t num; - - switch (DAP_Data.debug_port) { -#if (DAP_SWD != 0) - case DAP_PORT_SWD: - num = DAP_SWD_Transfer(request, response); - break; -#endif -#if (DAP_JTAG != 0) - case DAP_PORT_JTAG: - num = DAP_JTAG_Transfer(request, response); - break; -#endif - default: - num = DAP_Dummy_Transfer(request, response); - break; - } - - return (num); -} - - -// Process SWD Transfer Block command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response -#if (DAP_SWD != 0) -static uint32_t DAP_SWD_TransferBlock(const uint8_t *request, uint8_t *response) { - uint32_t request_count; - uint32_t request_value; - uint32_t response_count; - uint32_t response_value; - uint8_t *response_head; - uint32_t retry; - uint32_t data; - - response_count = 0U; - response_value = 0U; - response_head = response; - response += 3; - - DAP_TransferAbort = 0U; - - request++; // Ignore DAP index - - request_count = (uint32_t)(*(request+0) << 0) | - (uint32_t)(*(request+1) << 8); - request += 2; - if (request_count == 0U) { - goto end; - } - - request_value = *request++; - if ((request_value & DAP_TRANSFER_RnW) != 0U) { - // Read register block - if ((request_value & DAP_TRANSFER_APnDP) != 0U) { - // Post AP read - retry = DAP_Data.transfer.retry_count; - do { - response_value = SWD_Transfer(request_value, NULL); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - goto end; - } - } - while (request_count--) { - // Read DP/AP register - if ((request_count == 0U) && ((request_value & DAP_TRANSFER_APnDP) != 0U)) { - // Last AP read - request_value = DP_RDBUFF | DAP_TRANSFER_RnW; - } - retry = DAP_Data.transfer.retry_count; - do { - response_value = SWD_Transfer(request_value, &data); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - goto end; - } - // Store data - *response++ = (uint8_t) data; - *response++ = (uint8_t)(data >> 8); - *response++ = (uint8_t)(data >> 16); - *response++ = (uint8_t)(data >> 24); - response_count++; - } - } else { - // Write register block - while (request_count--) { - // Load data - data = (uint32_t)(*(request+0) << 0) | - (uint32_t)(*(request+1) << 8) | - (uint32_t)(*(request+2) << 16) | - (uint32_t)(*(request+3) << 24); - request += 4; - // Write DP/AP register - retry = DAP_Data.transfer.retry_count; - do { - response_value = SWD_Transfer(request_value, &data); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - goto end; - } - response_count++; - } - // Check last write - retry = DAP_Data.transfer.retry_count; - do { - response_value = SWD_Transfer(DP_RDBUFF | DAP_TRANSFER_RnW, NULL); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - } - -end: - *(response_head+0) = (uint8_t)(response_count >> 0); - *(response_head+1) = (uint8_t)(response_count >> 8); - *(response_head+2) = (uint8_t) response_value; - - return ((uint32_t)(response - response_head)); -} -#endif - - -// Process JTAG Transfer Block command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response -#if (DAP_JTAG != 0) -static uint32_t DAP_JTAG_TransferBlock(const uint8_t *request, uint8_t *response) { - uint32_t request_count; - uint32_t request_value; - uint32_t response_count; - uint32_t response_value; - uint8_t *response_head; - uint32_t retry; - uint32_t data; - uint32_t ir; - - response_count = 0U; - response_value = 0U; - response_head = response; - response += 3; - - DAP_TransferAbort = 0U; - - // Device index (JTAP TAP) - DAP_Data.jtag_dev.index = *request++; - if (DAP_Data.jtag_dev.index >= DAP_Data.jtag_dev.count) { - goto end; - } - - request_count = (uint32_t)(*(request+0) << 0) | - (uint32_t)(*(request+1) << 8); - request += 2; - if (request_count == 0U) { - goto end; - } - - request_value = *request++; - - // Select JTAG chain - ir = (request_value & DAP_TRANSFER_APnDP) ? JTAG_APACC : JTAG_DPACC; - JTAG_IR(ir); - - if ((request_value & DAP_TRANSFER_RnW) != 0U) { - // Post read - retry = DAP_Data.transfer.retry_count; - do { - response_value = JTAG_Transfer(request_value, NULL); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - goto end; - } - // Read register block - while (request_count--) { - // Read DP/AP register - if (request_count == 0U) { - // Last read - if (ir != JTAG_DPACC) { - JTAG_IR(JTAG_DPACC); - } - request_value = DP_RDBUFF | DAP_TRANSFER_RnW; - } - retry = DAP_Data.transfer.retry_count; - do { - response_value = JTAG_Transfer(request_value, &data); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - goto end; - } - // Store data - *response++ = (uint8_t) data; - *response++ = (uint8_t)(data >> 8); - *response++ = (uint8_t)(data >> 16); - *response++ = (uint8_t)(data >> 24); - response_count++; - } - } else { - // Write register block - while (request_count--) { - // Load data - data = (uint32_t)(*(request+0) << 0) | - (uint32_t)(*(request+1) << 8) | - (uint32_t)(*(request+2) << 16) | - (uint32_t)(*(request+3) << 24); - request += 4; - // Write DP/AP register - retry = DAP_Data.transfer.retry_count; - do { - response_value = JTAG_Transfer(request_value, &data); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - if (response_value != DAP_TRANSFER_OK) { - goto end; - } - response_count++; - } - // Check last write - if (ir != JTAG_DPACC) { - JTAG_IR(JTAG_DPACC); - } - retry = DAP_Data.transfer.retry_count; - do { - response_value = JTAG_Transfer(DP_RDBUFF | DAP_TRANSFER_RnW, NULL); - } while ((response_value == DAP_TRANSFER_WAIT) && retry-- && !DAP_TransferAbort); - } - -end: - *(response_head+0) = (uint8_t)(response_count >> 0); - *(response_head+1) = (uint8_t)(response_count >> 8); - *(response_head+2) = (uint8_t) response_value; - - return ((uint32_t)(response - response_head)); -} -#endif - - -// Process Transfer Block command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -static uint32_t DAP_TransferBlock(const uint8_t *request, uint8_t *response) { - uint32_t num; - - switch (DAP_Data.debug_port) { -#if (DAP_SWD != 0) - case DAP_PORT_SWD: - num = DAP_SWD_TransferBlock (request, response); - break; -#endif -#if (DAP_JTAG != 0) - case DAP_PORT_JTAG: - num = DAP_JTAG_TransferBlock(request, response); - break; -#endif - default: - *(response+0) = 0U; // Response count [7:0] - *(response+1) = 0U; // Response count[15:8] - *(response+2) = 0U; // Response value - num = 3U; - break; - } - - if ((*(request+3) & DAP_TRANSFER_RnW) != 0U) { - // Read register block - num |= 4U << 16; - } else { - // Write register block - num |= (4U + (((uint32_t)(*(request+1)) | (uint32_t)(*(request+2) << 8)) * 4)) << 16; - } - - return (num); -} - - -// Process SWD Write ABORT command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response -#if (DAP_SWD != 0) -static uint32_t DAP_SWD_WriteAbort(const uint8_t *request, uint8_t *response) { - uint32_t data; - - // Load data (Ignore DAP index) - data = (uint32_t)(*(request+1) << 0) | - (uint32_t)(*(request+2) << 8) | - (uint32_t)(*(request+3) << 16) | - (uint32_t)(*(request+4) << 24); - - // Write Abort register - SWD_Transfer(DP_ABORT, &data); - - *response = DAP_OK; - return (1U); -} -#endif - - -// Process JTAG Write ABORT command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response -#if (DAP_JTAG != 0) -static uint32_t DAP_JTAG_WriteAbort(const uint8_t *request, uint8_t *response) { - uint32_t data; - - // Device index (JTAP TAP) - DAP_Data.jtag_dev.index = *request; - if (DAP_Data.jtag_dev.index >= DAP_Data.jtag_dev.count) { - *response = DAP_ERROR; - return (1U); - } - - // Select JTAG chain - JTAG_IR(JTAG_ABORT); - - // Load data - data = (uint32_t)(*(request+1) << 0) | - (uint32_t)(*(request+2) << 8) | - (uint32_t)(*(request+3) << 16) | - (uint32_t)(*(request+4) << 24); - - // Write Abort register - JTAG_WriteAbort(data); - - *response = DAP_OK; - return (1U); -} -#endif - - -// Process Write ABORT command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -static uint32_t DAP_WriteAbort(const uint8_t *request, uint8_t *response) { - uint32_t num; - - switch (DAP_Data.debug_port) { -#if (DAP_SWD != 0) - case DAP_PORT_SWD: - num = DAP_SWD_WriteAbort (request, response); - break; -#endif -#if (DAP_JTAG != 0) - case DAP_PORT_JTAG: - num = DAP_JTAG_WriteAbort(request, response); - break; -#endif - default: - *response = DAP_ERROR; - num = 1U; - break; - } - return ((5U << 16) | num); -} - - -// Process DAP Vendor command request and prepare response -// Default function (can be overridden) -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -__WEAK uint32_t DAP_ProcessVendorCommand(const uint8_t *request, uint8_t *response) { - (void)request; - *response = ID_DAP_Invalid; - return ((1U << 16) | 1U); -} - - -// Process DAP command request and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -uint32_t DAP_ProcessCommand(const uint8_t *request, uint8_t *response) { - uint32_t num; - - if ((*request >= ID_DAP_Vendor0) && (*request <= ID_DAP_Vendor31)) { - return DAP_ProcessVendorCommand(request, response); - } - - *response++ = *request; - - switch (*request++) { - case ID_DAP_Info: - num = DAP_Info(*request, response+1); - *response = (uint8_t)num; - return ((2U << 16) + 2U + num); - - case ID_DAP_HostStatus: - num = DAP_HostStatus(request, response); - break; - - case ID_DAP_Connect: - num = DAP_Connect(request, response); - break; - case ID_DAP_Disconnect: - num = DAP_Disconnect(response); - break; - - case ID_DAP_Delay: - num = DAP_Delay(request, response); - break; - - case ID_DAP_ResetTarget: - num = DAP_ResetTarget(response); - break; - - case ID_DAP_SWJ_Pins: - num = DAP_SWJ_Pins(request, response); - break; - case ID_DAP_SWJ_Clock: - num = DAP_SWJ_Clock(request, response); - break; - case ID_DAP_SWJ_Sequence: - num = DAP_SWJ_Sequence(request, response); - break; - - case ID_DAP_SWD_Configure: - num = DAP_SWD_Configure(request, response); - break; - case ID_DAP_SWD_Sequence: - num = DAP_SWD_Sequence(request, response); - break; - - case ID_DAP_JTAG_Sequence: - num = DAP_JTAG_Sequence(request, response); - break; - case ID_DAP_JTAG_Configure: - num = DAP_JTAG_Configure(request, response); - break; - case ID_DAP_JTAG_IDCODE: - num = DAP_JTAG_IDCode(request, response); - break; - - case ID_DAP_TransferConfigure: - num = DAP_TransferConfigure(request, response); - break; - case ID_DAP_Transfer: - num = DAP_Transfer(request, response); - break; - case ID_DAP_TransferBlock: - num = DAP_TransferBlock(request, response); - break; - - case ID_DAP_WriteABORT: - num = DAP_WriteAbort(request, response); - break; - -#if ((SWO_UART != 0) || (SWO_MANCHESTER != 0)) - case ID_DAP_SWO_Transport: - num = SWO_Transport(request, response); - break; - case ID_DAP_SWO_Mode: - num = SWO_Mode(request, response); - break; - case ID_DAP_SWO_Baudrate: - num = SWO_Baudrate(request, response); - break; - case ID_DAP_SWO_Control: - num = SWO_Control(request, response); - break; - case ID_DAP_SWO_Status: - num = SWO_Status(response); - break; - case ID_DAP_SWO_ExtendedStatus: - num = SWO_ExtendedStatus(request, response); - break; - case ID_DAP_SWO_Data: - num = SWO_Data(request, response); - break; -#endif - -#if (DAP_UART != 0) - case ID_DAP_UART_Transport: - num = UART_Transport(request, response); - break; - case ID_DAP_UART_Configure: - num = UART_Configure(request, response); - break; - case ID_DAP_UART_Control: - num = UART_Control(request, response); - break; - case ID_DAP_UART_Status: - num = UART_Status(response); - break; - case ID_DAP_UART_Transfer: - num = UART_Transfer(request, response); - break; -#endif - - default: - *(response-1) = ID_DAP_Invalid; - return ((1U << 16) | 1U); - } - - return ((1U << 16) + 1U + num); -} - - -// Execute DAP command (process request and prepare response) -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -uint32_t DAP_ExecuteCommand(const uint8_t *request, uint8_t *response) { - uint32_t cnt, num, n; - - if (*request == ID_DAP_ExecuteCommands) { - *response++ = *request++; - cnt = *request++; - *response++ = (uint8_t)cnt; - num = (2U << 16) | 2U; - while (cnt--) { - n = DAP_ProcessCommand(request, response); - num += n; - request += (uint16_t)(n >> 16); - response += (uint16_t) n; - } - return (num); - } - - return DAP_ProcessCommand(request, response); -} - - -// Setup DAP -void DAP_Setup(void) { - - // Default settings - DAP_Data.debug_port = 0U; - DAP_Data.transfer.idle_cycles = 0U; - DAP_Data.transfer.retry_count = 100U; - DAP_Data.transfer.match_retry = 0U; - DAP_Data.transfer.match_mask = 0x00000000U; -#if (DAP_SWD != 0) - DAP_Data.swd_conf.turnaround = 1U; - DAP_Data.swd_conf.data_phase = 0U; -#endif -#if (DAP_JTAG != 0) - DAP_Data.jtag_dev.count = 0U; -#endif - - // Sets DAP_Data.fast_clock and DAP_Data.clock_delay. - Set_Clock_Delay(DAP_DEFAULT_SWJ_CLOCK); - - DAP_SETUP(); // Device specific setup -} diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/DAP_vendor.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/DAP_vendor.c deleted file mode 100644 index 4f2477a..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/DAP_vendor.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2013-2017 ARM Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * ---------------------------------------------------------------------- - * - * $Date: 1. December 2017 - * $Revision: V2.0.0 - * - * Project: CMSIS-DAP Source - * Title: DAP_vendor.c CMSIS-DAP Vendor Commands - * - *---------------------------------------------------------------------------*/ - -#include "DAP_config.h" -#include "DAP.h" - -//************************************************************************************************** -/** -\defgroup DAP_Vendor_Adapt_gr Adapt Vendor Commands -\ingroup DAP_Vendor_gr -@{ - -The file DAP_vendor.c provides template source code for extension of a Debug Unit with -Vendor Commands. Copy this file to the project folder of the Debug Unit and add the -file to the MDK-ARM project under the file group Configuration. -*/ - -/** Process DAP Vendor Command and prepare Response Data -\param request pointer to request data -\param response pointer to response data -\return number of bytes in response (lower 16 bits) - number of bytes in request (upper 16 bits) -*/ -uint32_t DAP_ProcessVendorCommand(const uint8_t *request, uint8_t *response) { - uint32_t num = (1U << 16) | 1U; - - *response++ = *request; // copy Command ID - - switch (*request++) { // first byte in request is Command ID - case ID_DAP_Vendor0: -#if 0 // example user command - num += 1U << 16; // increment request count - if (*request == 1U) { // when first command data byte is 1 - *response++ = 'X'; // send 'X' as response - num++; // increment response count - } -#endif - break; - - case ID_DAP_Vendor1: break; - case ID_DAP_Vendor2: break; - case ID_DAP_Vendor3: break; - case ID_DAP_Vendor4: break; - case ID_DAP_Vendor5: break; - case ID_DAP_Vendor6: break; - case ID_DAP_Vendor7: break; - case ID_DAP_Vendor8: break; - case ID_DAP_Vendor9: break; - case ID_DAP_Vendor10: break; - case ID_DAP_Vendor11: break; - case ID_DAP_Vendor12: break; - case ID_DAP_Vendor13: break; - case ID_DAP_Vendor14: break; - case ID_DAP_Vendor15: break; - case ID_DAP_Vendor16: break; - case ID_DAP_Vendor17: break; - case ID_DAP_Vendor18: break; - case ID_DAP_Vendor19: break; - case ID_DAP_Vendor20: break; - case ID_DAP_Vendor21: break; - case ID_DAP_Vendor22: break; - case ID_DAP_Vendor23: break; - case ID_DAP_Vendor24: break; - case ID_DAP_Vendor25: break; - case ID_DAP_Vendor26: break; - case ID_DAP_Vendor27: break; - case ID_DAP_Vendor28: break; - case ID_DAP_Vendor29: break; - case ID_DAP_Vendor30: break; - case ID_DAP_Vendor31: break; - } - - return (num); -} - -///@} diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/JTAG_DP.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/JTAG_DP.c deleted file mode 100644 index 24b1f3f..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/JTAG_DP.c +++ /dev/null @@ -1,370 +0,0 @@ -/* - * Copyright (c) 2013-2017 ARM Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * ---------------------------------------------------------------------- - * - * $Date: 1. December 2017 - * $Revision: V2.0.0 - * - * Project: CMSIS-DAP Source - * Title: JTAG_DP.c CMSIS-DAP JTAG DP I/O - * - *---------------------------------------------------------------------------*/ - -#include "DAP_config.h" -#include "DAP.h" - - -// JTAG Macros - -#define PIN_TCK_SET PIN_SWCLK_TCK_SET -#define PIN_TCK_CLR PIN_SWCLK_TCK_CLR -#define PIN_TMS_SET PIN_SWDIO_TMS_SET -#define PIN_TMS_CLR PIN_SWDIO_TMS_CLR - -#define JTAG_CYCLE_TCK() \ - PIN_TCK_CLR(); \ - PIN_DELAY(); \ - PIN_TCK_SET(); \ - PIN_DELAY() - -#define JTAG_CYCLE_TDI(tdi) \ - PIN_TDI_OUT(tdi); \ - PIN_TCK_CLR(); \ - PIN_DELAY(); \ - PIN_TCK_SET(); \ - PIN_DELAY() - -#define JTAG_CYCLE_TDO(tdo) \ - PIN_TCK_CLR(); \ - PIN_DELAY(); \ - tdo = PIN_TDO_IN(); \ - PIN_TCK_SET(); \ - PIN_DELAY() - -#define JTAG_CYCLE_TDIO(tdi,tdo) \ - PIN_TDI_OUT(tdi); \ - PIN_TCK_CLR(); \ - PIN_DELAY(); \ - tdo = PIN_TDO_IN(); \ - PIN_TCK_SET(); \ - PIN_DELAY() - -#define PIN_DELAY() PIN_DELAY_SLOW(DAP_Data.clock_delay) - - -#if (DAP_JTAG != 0) - - -// Generate JTAG Sequence -// info: sequence information -// tdi: pointer to TDI generated data -// tdo: pointer to TDO captured data -// return: none -void JTAG_Sequence (uint32_t info, const uint8_t *tdi, uint8_t *tdo) { - uint32_t i_val; - uint32_t o_val; - uint32_t bit; - uint32_t n, k; - - n = info & JTAG_SEQUENCE_TCK; - if (n == 0U) { - n = 64U; - } - - if (info & JTAG_SEQUENCE_TMS) { - PIN_TMS_SET(); - } else { - PIN_TMS_CLR(); - } - - while (n) { - i_val = *tdi++; - o_val = 0U; - for (k = 8U; k && n; k--, n--) { - JTAG_CYCLE_TDIO(i_val, bit); - i_val >>= 1; - o_val >>= 1; - o_val |= bit << 7; - } - o_val >>= k; - if (info & JTAG_SEQUENCE_TDO) { - *tdo++ = (uint8_t)o_val; - } - } -} - - -// JTAG Set IR -// ir: IR value -// return: none -#define JTAG_IR_Function(speed) /**/ \ -static void JTAG_IR_##speed (uint32_t ir) { \ - uint32_t n; \ - \ - PIN_TMS_SET(); \ - JTAG_CYCLE_TCK(); /* Select-DR-Scan */ \ - JTAG_CYCLE_TCK(); /* Select-IR-Scan */ \ - PIN_TMS_CLR(); \ - JTAG_CYCLE_TCK(); /* Capture-IR */ \ - JTAG_CYCLE_TCK(); /* Shift-IR */ \ - \ - PIN_TDI_OUT(1U); \ - for (n = DAP_Data.jtag_dev.ir_before[DAP_Data.jtag_dev.index]; n; n--) { \ - JTAG_CYCLE_TCK(); /* Bypass before data */ \ - } \ - for (n = DAP_Data.jtag_dev.ir_length[DAP_Data.jtag_dev.index] - 1U; n; n--) { \ - JTAG_CYCLE_TDI(ir); /* Set IR bits (except last) */ \ - ir >>= 1; \ - } \ - n = DAP_Data.jtag_dev.ir_after[DAP_Data.jtag_dev.index]; \ - if (n) { \ - JTAG_CYCLE_TDI(ir); /* Set last IR bit */ \ - PIN_TDI_OUT(1U); \ - for (--n; n; n--) { \ - JTAG_CYCLE_TCK(); /* Bypass after data */ \ - } \ - PIN_TMS_SET(); \ - JTAG_CYCLE_TCK(); /* Bypass & Exit1-IR */ \ - } else { \ - PIN_TMS_SET(); \ - JTAG_CYCLE_TDI(ir); /* Set last IR bit & Exit1-IR */ \ - } \ - \ - JTAG_CYCLE_TCK(); /* Update-IR */ \ - PIN_TMS_CLR(); \ - JTAG_CYCLE_TCK(); /* Idle */ \ - PIN_TDI_OUT(1U); \ -} - - -// JTAG Transfer I/O -// request: A[3:2] RnW APnDP -// data: DATA[31:0] -// return: ACK[2:0] -#define JTAG_TransferFunction(speed) /**/ \ -static uint8_t JTAG_Transfer##speed (uint32_t request, uint32_t *data) { \ - uint32_t ack; \ - uint32_t bit; \ - uint32_t val; \ - uint32_t n; \ - \ - PIN_TMS_SET(); \ - JTAG_CYCLE_TCK(); /* Select-DR-Scan */ \ - PIN_TMS_CLR(); \ - JTAG_CYCLE_TCK(); /* Capture-DR */ \ - JTAG_CYCLE_TCK(); /* Shift-DR */ \ - \ - for (n = DAP_Data.jtag_dev.index; n; n--) { \ - JTAG_CYCLE_TCK(); /* Bypass before data */ \ - } \ - \ - JTAG_CYCLE_TDIO(request >> 1, bit); /* Set RnW, Get ACK.0 */ \ - ack = bit << 1; \ - JTAG_CYCLE_TDIO(request >> 2, bit); /* Set A2, Get ACK.1 */ \ - ack |= bit << 0; \ - JTAG_CYCLE_TDIO(request >> 3, bit); /* Set A3, Get ACK.2 */ \ - ack |= bit << 2; \ - \ - if (ack != DAP_TRANSFER_OK) { \ - /* Exit on error */ \ - PIN_TMS_SET(); \ - JTAG_CYCLE_TCK(); /* Exit1-DR */ \ - goto exit; \ - } \ - \ - if (request & DAP_TRANSFER_RnW) { \ - /* Read Transfer */ \ - val = 0U; \ - for (n = 31U; n; n--) { \ - JTAG_CYCLE_TDO(bit); /* Get D0..D30 */ \ - val |= bit << 31; \ - val >>= 1; \ - } \ - n = DAP_Data.jtag_dev.count - DAP_Data.jtag_dev.index - 1U; \ - if (n) { \ - JTAG_CYCLE_TDO(bit); /* Get D31 */ \ - for (--n; n; n--) { \ - JTAG_CYCLE_TCK(); /* Bypass after data */ \ - } \ - PIN_TMS_SET(); \ - JTAG_CYCLE_TCK(); /* Bypass & Exit1-DR */ \ - } else { \ - PIN_TMS_SET(); \ - JTAG_CYCLE_TDO(bit); /* Get D31 & Exit1-DR */ \ - } \ - val |= bit << 31; \ - if (data) { *data = val; } \ - } else { \ - /* Write Transfer */ \ - val = *data; \ - for (n = 31U; n; n--) { \ - JTAG_CYCLE_TDI(val); /* Set D0..D30 */ \ - val >>= 1; \ - } \ - n = DAP_Data.jtag_dev.count - DAP_Data.jtag_dev.index - 1U; \ - if (n) { \ - JTAG_CYCLE_TDI(val); /* Set D31 */ \ - for (--n; n; n--) { \ - JTAG_CYCLE_TCK(); /* Bypass after data */ \ - } \ - PIN_TMS_SET(); \ - JTAG_CYCLE_TCK(); /* Bypass & Exit1-DR */ \ - } else { \ - PIN_TMS_SET(); \ - JTAG_CYCLE_TDI(val); /* Set D31 & Exit1-DR */ \ - } \ - } \ - \ -exit: \ - JTAG_CYCLE_TCK(); /* Update-DR */ \ - PIN_TMS_CLR(); \ - JTAG_CYCLE_TCK(); /* Idle */ \ - PIN_TDI_OUT(1U); \ - \ - /* Capture Timestamp */ \ - if (request & DAP_TRANSFER_TIMESTAMP) { \ - DAP_Data.timestamp = TIMESTAMP_GET(); \ - } \ - \ - /* Idle cycles */ \ - n = DAP_Data.transfer.idle_cycles; \ - while (n--) { \ - JTAG_CYCLE_TCK(); /* Idle */ \ - } \ - \ - return ((uint8_t)ack); \ -} - - -#undef PIN_DELAY -#define PIN_DELAY() PIN_DELAY_FAST() -JTAG_IR_Function(Fast) -JTAG_TransferFunction(Fast) - -#undef PIN_DELAY -#define PIN_DELAY() PIN_DELAY_SLOW(DAP_Data.clock_delay) -JTAG_IR_Function(Slow) -JTAG_TransferFunction(Slow) - - -// JTAG Read IDCODE register -// return: value read -uint32_t JTAG_ReadIDCode (void) { - uint32_t bit; - uint32_t val; - uint32_t n; - - PIN_TMS_SET(); - JTAG_CYCLE_TCK(); /* Select-DR-Scan */ - PIN_TMS_CLR(); - JTAG_CYCLE_TCK(); /* Capture-DR */ - JTAG_CYCLE_TCK(); /* Shift-DR */ - - for (n = DAP_Data.jtag_dev.index; n; n--) { - JTAG_CYCLE_TCK(); /* Bypass before data */ - } - - val = 0U; - for (n = 31U; n; n--) { - JTAG_CYCLE_TDO(bit); /* Get D0..D30 */ - val |= bit << 31; - val >>= 1; - } - PIN_TMS_SET(); - JTAG_CYCLE_TDO(bit); /* Get D31 & Exit1-DR */ - val |= bit << 31; - - JTAG_CYCLE_TCK(); /* Update-DR */ - PIN_TMS_CLR(); - JTAG_CYCLE_TCK(); /* Idle */ - - return (val); -} - - -// JTAG Write ABORT register -// data: value to write -// return: none -void JTAG_WriteAbort (uint32_t data) { - uint32_t n; - - PIN_TMS_SET(); - JTAG_CYCLE_TCK(); /* Select-DR-Scan */ - PIN_TMS_CLR(); - JTAG_CYCLE_TCK(); /* Capture-DR */ - JTAG_CYCLE_TCK(); /* Shift-DR */ - - for (n = DAP_Data.jtag_dev.index; n; n--) { - JTAG_CYCLE_TCK(); /* Bypass before data */ - } - - PIN_TDI_OUT(0U); - JTAG_CYCLE_TCK(); /* Set RnW=0 (Write) */ - JTAG_CYCLE_TCK(); /* Set A2=0 */ - JTAG_CYCLE_TCK(); /* Set A3=0 */ - - for (n = 31U; n; n--) { - JTAG_CYCLE_TDI(data); /* Set D0..D30 */ - data >>= 1; - } - n = DAP_Data.jtag_dev.count - DAP_Data.jtag_dev.index - 1U; - if (n) { - JTAG_CYCLE_TDI(data); /* Set D31 */ - for (--n; n; n--) { - JTAG_CYCLE_TCK(); /* Bypass after data */ - } - PIN_TMS_SET(); - JTAG_CYCLE_TCK(); /* Bypass & Exit1-DR */ - } else { - PIN_TMS_SET(); - JTAG_CYCLE_TDI(data); /* Set D31 & Exit1-DR */ - } - - JTAG_CYCLE_TCK(); /* Update-DR */ - PIN_TMS_CLR(); - JTAG_CYCLE_TCK(); /* Idle */ - PIN_TDI_OUT(1U); -} - - -// JTAG Set IR -// ir: IR value -// return: none -void JTAG_IR (uint32_t ir) { - if (DAP_Data.fast_clock) { - JTAG_IR_Fast(ir); - } else { - JTAG_IR_Slow(ir); - } -} - - -// JTAG Transfer I/O -// request: A[3:2] RnW APnDP -// data: DATA[31:0] -// return: ACK[2:0] -uint8_t JTAG_Transfer(uint32_t request, uint32_t *data) { - if (DAP_Data.fast_clock) { - return JTAG_TransferFast(request, data); - } else { - return JTAG_TransferSlow(request, data); - } -} - - -#endif /* (DAP_JTAG != 0) */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/SWO.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/SWO.c deleted file mode 100644 index 4a850cf..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/SWO.c +++ /dev/null @@ -1,798 +0,0 @@ -/* - * Copyright (c) 2013-2021 ARM Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * ---------------------------------------------------------------------- - * - * $Date: 29. March 2021 - * $Revision: V2.0.1 - * - * Project: CMSIS-DAP Source - * Title: SWO.c CMSIS-DAP SWO I/O - * - *---------------------------------------------------------------------------*/ - -#include "DAP_config.h" -#include "DAP.h" -#if (SWO_UART != 0) -#include "Driver_USART.h" -#endif -#if (SWO_STREAM != 0) -#include "cmsis_os2.h" -#define osObjectsExternal -#include "osObjects.h" -#endif - -#if (SWO_STREAM != 0) -#ifdef DAP_FW_V1 -#error "SWO Streaming Trace not supported in DAP V1!" -#endif -#endif - -#if (SWO_UART != 0) - -// USART Driver -#define _USART_Driver_(n) Driver_USART##n -#define USART_Driver_(n) _USART_Driver_(n) -extern ARM_DRIVER_USART USART_Driver_(SWO_UART_DRIVER); -#define pUSART (&USART_Driver_(SWO_UART_DRIVER)) - -static uint8_t USART_Ready = 0U; - -#endif /* (SWO_UART != 0) */ - - -#if ((SWO_UART != 0) || (SWO_MANCHESTER != 0)) - - -#define SWO_STREAM_TIMEOUT 50U /* Stream timeout in ms */ - -#define USB_BLOCK_SIZE 512U /* USB Block Size */ -#define TRACE_BLOCK_SIZE 64U /* Trace Block Size (2^n: 32...512) */ - -// Trace State -static uint8_t TraceTransport = 0U; /* Trace Transport */ -static uint8_t TraceMode = 0U; /* Trace Mode */ -static uint8_t TraceStatus = 0U; /* Trace Status without Errors */ -static uint8_t TraceError[2] = {0U, 0U}; /* Trace Error flags (banked) */ -static uint8_t TraceError_n = 0U; /* Active Trace Error bank */ - -// Trace Buffer -static uint8_t TraceBuf[SWO_BUFFER_SIZE]; /* Trace Buffer (must be 2^n) */ -static volatile uint32_t TraceIndexI = 0U; /* Incoming Trace Index */ -static volatile uint32_t TraceIndexO = 0U; /* Outgoing Trace Index */ -static volatile uint8_t TraceUpdate; /* Trace Update Flag */ -static uint32_t TraceBlockSize; /* Current Trace Block Size */ - -#if (TIMESTAMP_CLOCK != 0U) -// Trace Timestamp -static volatile struct { - uint32_t index; - uint32_t tick; -} TraceTimestamp; -#endif - -// Trace Helper functions -static void ClearTrace (void); -static void ResumeTrace (void); -static uint32_t GetTraceCount (void); -static uint8_t GetTraceStatus (void); -static void SetTraceError (uint8_t flag); - -#if (SWO_STREAM != 0) -extern osThreadId_t SWO_ThreadId; -static volatile uint8_t TransferBusy = 0U; /* Transfer Busy Flag */ -static uint32_t TransferSize; /* Current Transfer Size */ -#endif - - -#if (SWO_UART != 0) - -// USART Driver Callback function -// event: event mask -static void USART_Callback (uint32_t event) { - uint32_t index_i; - uint32_t index_o; - uint32_t count; - uint32_t num; - - if (event & ARM_USART_EVENT_RECEIVE_COMPLETE) { -#if (TIMESTAMP_CLOCK != 0U) - TraceTimestamp.tick = TIMESTAMP_GET(); -#endif - index_o = TraceIndexO; - index_i = TraceIndexI; - index_i += TraceBlockSize; - TraceIndexI = index_i; -#if (TIMESTAMP_CLOCK != 0U) - TraceTimestamp.index = index_i; -#endif - num = TRACE_BLOCK_SIZE - (index_i & (TRACE_BLOCK_SIZE - 1U)); - count = index_i - index_o; - if (count <= (SWO_BUFFER_SIZE - num)) { - index_i &= SWO_BUFFER_SIZE - 1U; - TraceBlockSize = num; - pUSART->Receive(&TraceBuf[index_i], num); - } else { - TraceStatus = DAP_SWO_CAPTURE_ACTIVE | DAP_SWO_CAPTURE_PAUSED; - } - TraceUpdate = 1U; -#if (SWO_STREAM != 0) - if (TraceTransport == 2U) { - if (count >= (USB_BLOCK_SIZE - (index_o & (USB_BLOCK_SIZE - 1U)))) { - osThreadFlagsSet(SWO_ThreadId, 1U); - } - } -#endif - } - if (event & ARM_USART_EVENT_RX_OVERFLOW) { - SetTraceError(DAP_SWO_BUFFER_OVERRUN); - } - if (event & (ARM_USART_EVENT_RX_BREAK | - ARM_USART_EVENT_RX_FRAMING_ERROR | - ARM_USART_EVENT_RX_PARITY_ERROR)) { - SetTraceError(DAP_SWO_STREAM_ERROR); - } -} - -// Enable or disable SWO Mode (UART) -// enable: enable flag -// return: 1 - Success, 0 - Error -__WEAK uint32_t SWO_Mode_UART (uint32_t enable) { - int32_t status; - - USART_Ready = 0U; - - if (enable != 0U) { - status = pUSART->Initialize(USART_Callback); - if (status != ARM_DRIVER_OK) { - return (0U); - } - status = pUSART->PowerControl(ARM_POWER_FULL); - if (status != ARM_DRIVER_OK) { - pUSART->Uninitialize(); - return (0U); - } - } else { - pUSART->Control(ARM_USART_CONTROL_RX, 0U); - pUSART->Control(ARM_USART_ABORT_RECEIVE, 0U); - pUSART->PowerControl(ARM_POWER_OFF); - pUSART->Uninitialize(); - } - return (1U); -} - -// Configure SWO Baudrate (UART) -// baudrate: requested baudrate -// return: actual baudrate or 0 when not configured -__WEAK uint32_t SWO_Baudrate_UART (uint32_t baudrate) { - int32_t status; - uint32_t index; - uint32_t num; - - if (baudrate > SWO_UART_MAX_BAUDRATE) { - baudrate = SWO_UART_MAX_BAUDRATE; - } - - if (TraceStatus & DAP_SWO_CAPTURE_ACTIVE) { - pUSART->Control(ARM_USART_CONTROL_RX, 0U); - if (pUSART->GetStatus().rx_busy) { - TraceIndexI += pUSART->GetRxCount(); - pUSART->Control(ARM_USART_ABORT_RECEIVE, 0U); - } - } - - status = pUSART->Control(ARM_USART_MODE_ASYNCHRONOUS | - ARM_USART_DATA_BITS_8 | - ARM_USART_PARITY_NONE | - ARM_USART_STOP_BITS_1, - baudrate); - - if (status == ARM_DRIVER_OK) { - USART_Ready = 1U; - } else { - USART_Ready = 0U; - return (0U); - } - - if (TraceStatus & DAP_SWO_CAPTURE_ACTIVE) { - if ((TraceStatus & DAP_SWO_CAPTURE_PAUSED) == 0U) { - index = TraceIndexI & (SWO_BUFFER_SIZE - 1U); - num = TRACE_BLOCK_SIZE - (index & (TRACE_BLOCK_SIZE - 1U)); - TraceBlockSize = num; - pUSART->Receive(&TraceBuf[index], num); - } - pUSART->Control(ARM_USART_CONTROL_RX, 1U); - } - - return (baudrate); -} - -// Control SWO Capture (UART) -// active: active flag -// return: 1 - Success, 0 - Error -__WEAK uint32_t SWO_Control_UART (uint32_t active) { - int32_t status; - - if (active) { - if (!USART_Ready) { - return (0U); - } - TraceBlockSize = 1U; - status = pUSART->Receive(&TraceBuf[0], 1U); - if (status != ARM_DRIVER_OK) { - return (0U); - } - status = pUSART->Control(ARM_USART_CONTROL_RX, 1U); - if (status != ARM_DRIVER_OK) { - return (0U); - } - } else { - pUSART->Control(ARM_USART_CONTROL_RX, 0U); - if (pUSART->GetStatus().rx_busy) { - TraceIndexI += pUSART->GetRxCount(); - pUSART->Control(ARM_USART_ABORT_RECEIVE, 0U); - } - } - return (1U); -} - -// Start SWO Capture (UART) -// buf: pointer to buffer for capturing -// num: number of bytes to capture -__WEAK void SWO_Capture_UART (uint8_t *buf, uint32_t num) { - TraceBlockSize = num; - pUSART->Receive(buf, num); -} - -// Get SWO Pending Trace Count (UART) -// return: number of pending trace data bytes -__WEAK uint32_t SWO_GetCount_UART (void) { - uint32_t count; - - if (pUSART->GetStatus().rx_busy) { - count = pUSART->GetRxCount(); - } else { - count = 0U; - } - return (count); -} - -#endif /* (SWO_UART != 0) */ - - -#if (SWO_MANCHESTER != 0) - -// Enable or disable SWO Mode (Manchester) -// enable: enable flag -// return: 1 - Success, 0 - Error -__WEAK uint32_t SWO_Mode_Manchester (uint32_t enable) { - return (0U); -} - -// Configure SWO Baudrate (Manchester) -// baudrate: requested baudrate -// return: actual baudrate or 0 when not configured -__WEAK uint32_t SWO_Baudrate_Manchester (uint32_t baudrate) { - return (0U); -} - -// Control SWO Capture (Manchester) -// active: active flag -// return: 1 - Success, 0 - Error -__WEAK uint32_t SWO_Control_Manchester (uint32_t active) { - return (0U); -} - -// Start SWO Capture (Manchester) -// buf: pointer to buffer for capturing -// num: number of bytes to capture -__WEAK void SWO_Capture_Manchester (uint8_t *buf, uint32_t num) { -} - -// Get SWO Pending Trace Count (Manchester) -// return: number of pending trace data bytes -__WEAK uint32_t SWO_GetCount_Manchester (void) { -} - -#endif /* (SWO_MANCHESTER != 0) */ - - -// Clear Trace Errors and Data -static void ClearTrace (void) { - -#if (SWO_STREAM != 0) - if (TraceTransport == 2U) { - if (TransferBusy != 0U) { - SWO_AbortTransfer(); - TransferBusy = 0U; - } - } -#endif - - TraceError[0] = 0U; - TraceError[1] = 0U; - TraceError_n = 0U; - TraceIndexI = 0U; - TraceIndexO = 0U; - -#if (TIMESTAMP_CLOCK != 0U) - TraceTimestamp.index = 0U; - TraceTimestamp.tick = 0U; -#endif -} - -// Resume Trace Capture -static void ResumeTrace (void) { - uint32_t index_i; - uint32_t index_o; - - if (TraceStatus == (DAP_SWO_CAPTURE_ACTIVE | DAP_SWO_CAPTURE_PAUSED)) { - index_i = TraceIndexI; - index_o = TraceIndexO; - if ((index_i - index_o) < SWO_BUFFER_SIZE) { - index_i &= SWO_BUFFER_SIZE - 1U; - switch (TraceMode) { -#if (SWO_UART != 0) - case DAP_SWO_UART: - TraceStatus = DAP_SWO_CAPTURE_ACTIVE; - SWO_Capture_UART(&TraceBuf[index_i], 1U); - break; -#endif -#if (SWO_MANCHESTER != 0) - case DAP_SWO_MANCHESTER: - TraceStatus = DAP_SWO_CAPTURE_ACTIVE; - SWO_Capture_Manchester(&TraceBuf[index_i], 1U); - break; -#endif - default: - break; - } - } - } -} - -// Get Trace Count -// return: number of available data bytes in trace buffer -static uint32_t GetTraceCount (void) { - uint32_t count; - - if (TraceStatus == DAP_SWO_CAPTURE_ACTIVE) { - do { - TraceUpdate = 0U; - count = TraceIndexI - TraceIndexO; - switch (TraceMode) { -#if (SWO_UART != 0) - case DAP_SWO_UART: - count += SWO_GetCount_UART(); - break; -#endif -#if (SWO_MANCHESTER != 0) - case DAP_SWO_MANCHESTER: - count += SWO_GetCount_Manchester(); - break; -#endif - default: - break; - } - } while (TraceUpdate != 0U); - } else { - count = TraceIndexI - TraceIndexO; - } - - return (count); -} - -// Get Trace Status (clear Error flags) -// return: Trace Status (Active flag and Error flags) -static uint8_t GetTraceStatus (void) { - uint8_t status; - uint32_t n; - - n = TraceError_n; - TraceError_n ^= 1U; - status = TraceStatus | TraceError[n]; - TraceError[n] = 0U; - - return (status); -} - -// Set Trace Error flag(s) -// flag: error flag(s) to set -static void SetTraceError (uint8_t flag) { - TraceError[TraceError_n] |= flag; -} - - -// Process SWO Transport command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -uint32_t SWO_Transport (const uint8_t *request, uint8_t *response) { - uint8_t transport; - uint32_t result; - - if ((TraceStatus & DAP_SWO_CAPTURE_ACTIVE) == 0U) { - transport = *request; - switch (transport) { - case 0U: - case 1U: -#if (SWO_STREAM != 0) - case 2U: -#endif - TraceTransport = transport; - result = 1U; - break; - default: - result = 0U; - break; - } - } else { - result = 0U; - } - - if (result != 0U) { - *response = DAP_OK; - } else { - *response = DAP_ERROR; - } - - return ((1U << 16) | 1U); -} - - -// Process SWO Mode command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -uint32_t SWO_Mode (const uint8_t *request, uint8_t *response) { - uint8_t mode; - uint32_t result; - - mode = *request; - - switch (TraceMode) { -#if (SWO_UART != 0) - case DAP_SWO_UART: - SWO_Mode_UART(0U); - break; -#endif -#if (SWO_MANCHESTER != 0) - case DAP_SWO_MANCHESTER: - SWO_Mode_Manchester(0U); - break; -#endif - default: - break; - } - - switch (mode) { - case DAP_SWO_OFF: - result = 1U; - break; -#if (SWO_UART != 0) - case DAP_SWO_UART: - result = SWO_Mode_UART(1U); - break; -#endif -#if (SWO_MANCHESTER != 0) - case DAP_SWO_MANCHESTER: - result = SWO_Mode_Manchester(1U); - break; -#endif - default: - result = 0U; - break; - } - if (result != 0U) { - TraceMode = mode; - } else { - TraceMode = DAP_SWO_OFF; - } - - TraceStatus = 0U; - - if (result != 0U) { - *response = DAP_OK; - } else { - *response = DAP_ERROR; - } - - return ((1U << 16) | 1U); -} - - -// Process SWO Baudrate command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -uint32_t SWO_Baudrate (const uint8_t *request, uint8_t *response) { - uint32_t baudrate; - - baudrate = (uint32_t)(*(request+0) << 0) | - (uint32_t)(*(request+1) << 8) | - (uint32_t)(*(request+2) << 16) | - (uint32_t)(*(request+3) << 24); - - switch (TraceMode) { -#if (SWO_UART != 0) - case DAP_SWO_UART: - baudrate = SWO_Baudrate_UART(baudrate); - break; -#endif -#if (SWO_MANCHESTER != 0) - case DAP_SWO_MANCHESTER: - baudrate = SWO_Baudrate_Manchester(baudrate); - break; -#endif - default: - baudrate = 0U; - break; - } - - if (baudrate == 0U) { - TraceStatus = 0U; - } - - *response++ = (uint8_t)(baudrate >> 0); - *response++ = (uint8_t)(baudrate >> 8); - *response++ = (uint8_t)(baudrate >> 16); - *response = (uint8_t)(baudrate >> 24); - - return ((4U << 16) | 4U); -} - - -// Process SWO Control command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -uint32_t SWO_Control (const uint8_t *request, uint8_t *response) { - uint8_t active; - uint32_t result; - - active = *request & DAP_SWO_CAPTURE_ACTIVE; - - if (active != (TraceStatus & DAP_SWO_CAPTURE_ACTIVE)) { - if (active) { - ClearTrace(); - } - switch (TraceMode) { -#if (SWO_UART != 0) - case DAP_SWO_UART: - result = SWO_Control_UART(active); - break; -#endif -#if (SWO_MANCHESTER != 0) - case DAP_SWO_MANCHESTER: - result = SWO_Control_Manchester(active); - break; -#endif - default: - result = 0U; - break; - } - if (result != 0U) { - TraceStatus = active; -#if (SWO_STREAM != 0) - if (TraceTransport == 2U) { - osThreadFlagsSet(SWO_ThreadId, 1U); - } -#endif - } - } else { - result = 1U; - } - - if (result != 0U) { - *response = DAP_OK; - } else { - *response = DAP_ERROR; - } - - return ((1U << 16) | 1U); -} - - -// Process SWO Status command and prepare response -// response: pointer to response data -// return: number of bytes in response -uint32_t SWO_Status (uint8_t *response) { - uint8_t status; - uint32_t count; - - status = GetTraceStatus(); - count = GetTraceCount(); - - *response++ = status; - *response++ = (uint8_t)(count >> 0); - *response++ = (uint8_t)(count >> 8); - *response++ = (uint8_t)(count >> 16); - *response = (uint8_t)(count >> 24); - - return (5U); -} - - -// Process SWO Extended Status command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -uint32_t SWO_ExtendedStatus (const uint8_t *request, uint8_t *response) { - uint8_t cmd; - uint8_t status; - uint32_t count; -#if (TIMESTAMP_CLOCK != 0U) - uint32_t index; - uint32_t tick; -#endif - uint32_t num; - - num = 0U; - cmd = *request; - - if (cmd & 0x01U) { - status = GetTraceStatus(); - *response++ = status; - num += 1U; - } - - if (cmd & 0x02U) { - count = GetTraceCount(); - *response++ = (uint8_t)(count >> 0); - *response++ = (uint8_t)(count >> 8); - *response++ = (uint8_t)(count >> 16); - *response++ = (uint8_t)(count >> 24); - num += 4U; - } - -#if (TIMESTAMP_CLOCK != 0U) - if (cmd & 0x04U) { - do { - TraceUpdate = 0U; - index = TraceTimestamp.index; - tick = TraceTimestamp.tick; - } while (TraceUpdate != 0U); - *response++ = (uint8_t)(index >> 0); - *response++ = (uint8_t)(index >> 8); - *response++ = (uint8_t)(index >> 16); - *response++ = (uint8_t)(index >> 24); - *response++ = (uint8_t)(tick >> 0); - *response++ = (uint8_t)(tick >> 8); - *response++ = (uint8_t)(tick >> 16); - *response++ = (uint8_t)(tick >> 24); - num += 4U; - } -#endif - - return ((1U << 16) | num); -} - - -// Process SWO Data command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -uint32_t SWO_Data (const uint8_t *request, uint8_t *response) { - uint8_t status; - uint32_t count; - uint32_t index; - uint32_t n, i; - - status = GetTraceStatus(); - count = GetTraceCount(); - - if (TraceTransport == 1U) { - n = (uint32_t)(*(request+0) << 0) | - (uint32_t)(*(request+1) << 8); - if (n > (DAP_PACKET_SIZE - 4U)) { - n = DAP_PACKET_SIZE - 4U; - } - if (count > n) { - count = n; - } - } else { - count = 0U; - } - - *response++ = status; - *response++ = (uint8_t)(count >> 0); - *response++ = (uint8_t)(count >> 8); - - if (TraceTransport == 1U) { - index = TraceIndexO; - for (i = index, n = count; n; n--) { - i &= SWO_BUFFER_SIZE - 1U; - *response++ = TraceBuf[i++]; - } - TraceIndexO = index + count; - ResumeTrace(); - } - - return ((2U << 16) | (3U + count)); -} - - -#if (SWO_STREAM != 0) - -// SWO Data Transfer complete callback -void SWO_TransferComplete (void) { - TraceIndexO += TransferSize; - TransferBusy = 0U; - ResumeTrace(); - osThreadFlagsSet(SWO_ThreadId, 1U); -} - -// SWO Thread -__NO_RETURN void SWO_Thread (void *argument) { - uint32_t timeout; - uint32_t flags; - uint32_t count; - uint32_t index; - uint32_t i, n; - (void) argument; - - timeout = osWaitForever; - - for (;;) { - flags = osThreadFlagsWait(1U, osFlagsWaitAny, timeout); - if (TraceStatus & DAP_SWO_CAPTURE_ACTIVE) { - timeout = SWO_STREAM_TIMEOUT; - } else { - timeout = osWaitForever; - flags = osFlagsErrorTimeout; - } - if (TransferBusy == 0U) { - count = GetTraceCount(); - if (count != 0U) { - index = TraceIndexO & (SWO_BUFFER_SIZE - 1U); - n = SWO_BUFFER_SIZE - index; - if (count > n) { - count = n; - } - if (flags != osFlagsErrorTimeout) { - i = index & (USB_BLOCK_SIZE - 1U); - if (i == 0U) { - count &= ~(USB_BLOCK_SIZE - 1U); - } else { - n = USB_BLOCK_SIZE - i; - if (count >= n) { - count = n; - } else { - count = 0U; - } - } - } - if (count != 0U) { - TransferSize = count; - TransferBusy = 1U; - SWO_QueueTransfer(&TraceBuf[index], count); - } - } - } - } -} - -#endif /* (SWO_STREAM != 0) */ - - -#endif /* ((SWO_UART != 0) || (SWO_MANCHESTER != 0)) */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/SW_DP.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/SW_DP.c deleted file mode 100644 index 803cf42..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/SW_DP.c +++ /dev/null @@ -1,286 +0,0 @@ -/* - * Copyright (c) 2013-2017 ARM Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * ---------------------------------------------------------------------- - * - * $Date: 1. December 2017 - * $Revision: V2.0.0 - * - * Project: CMSIS-DAP Source - * Title: SW_DP.c CMSIS-DAP SW DP I/O - * - *---------------------------------------------------------------------------*/ - -#include "DAP_config.h" -#include "DAP.h" - - -// SW Macros - -#define PIN_SWCLK_SET PIN_SWCLK_TCK_SET -#define PIN_SWCLK_CLR PIN_SWCLK_TCK_CLR - -#define SW_CLOCK_CYCLE() \ - PIN_SWCLK_CLR(); \ - PIN_DELAY(); \ - PIN_SWCLK_SET(); \ - PIN_DELAY() - -#define SW_WRITE_BIT(bit) \ - PIN_SWDIO_OUT(bit); \ - PIN_SWCLK_CLR(); \ - PIN_DELAY(); \ - PIN_SWCLK_SET(); \ - PIN_DELAY() - -#define SW_READ_BIT(bit) \ - PIN_SWCLK_CLR(); \ - PIN_DELAY(); \ - bit = PIN_SWDIO_IN(); \ - PIN_SWCLK_SET(); \ - PIN_DELAY() - -#define PIN_DELAY() PIN_DELAY_SLOW(DAP_Data.clock_delay) - - -// Generate SWJ Sequence -// count: sequence bit count -// data: pointer to sequence bit data -// return: none -#if ((DAP_SWD != 0) || (DAP_JTAG != 0)) -void SWJ_Sequence (uint32_t count, const uint8_t *data) { - uint32_t val; - uint32_t n; - - val = 0U; - n = 0U; - while (count--) { - if (n == 0U) { - val = *data++; - n = 8U; - } - if (val & 1U) { - PIN_SWDIO_TMS_SET(); - } else { - PIN_SWDIO_TMS_CLR(); - } - SW_CLOCK_CYCLE(); - val >>= 1; - n--; - } -} -#endif - - -// Generate SWD Sequence -// info: sequence information -// swdo: pointer to SWDIO generated data -// swdi: pointer to SWDIO captured data -// return: none -#if (DAP_SWD != 0) -void SWD_Sequence (uint32_t info, const uint8_t *swdo, uint8_t *swdi) { - uint32_t val; - uint32_t bit; - uint32_t n, k; - - n = info & SWD_SEQUENCE_CLK; - if (n == 0U) { - n = 64U; - } - - if (info & SWD_SEQUENCE_DIN) { - while (n) { - val = 0U; - for (k = 8U; k && n; k--, n--) { - SW_READ_BIT(bit); - val >>= 1; - val |= bit << 7; - } - val >>= k; - *swdi++ = (uint8_t)val; - } - } else { - while (n) { - val = *swdo++; - for (k = 8U; k && n; k--, n--) { - SW_WRITE_BIT(val); - val >>= 1; - } - } - } -} -#endif - - -#if (DAP_SWD != 0) - - -// SWD Transfer I/O -// request: A[3:2] RnW APnDP -// data: DATA[31:0] -// return: ACK[2:0] -#define SWD_TransferFunction(speed) /**/ \ -static uint8_t SWD_Transfer##speed (uint32_t request, uint32_t *data) { \ - uint32_t ack; \ - uint32_t bit; \ - uint32_t val; \ - uint32_t parity; \ - \ - uint32_t n; \ - \ - /* Packet Request */ \ - parity = 0U; \ - SW_WRITE_BIT(1U); /* Start Bit */ \ - bit = request >> 0; \ - SW_WRITE_BIT(bit); /* APnDP Bit */ \ - parity += bit; \ - bit = request >> 1; \ - SW_WRITE_BIT(bit); /* RnW Bit */ \ - parity += bit; \ - bit = request >> 2; \ - SW_WRITE_BIT(bit); /* A2 Bit */ \ - parity += bit; \ - bit = request >> 3; \ - SW_WRITE_BIT(bit); /* A3 Bit */ \ - parity += bit; \ - SW_WRITE_BIT(parity); /* Parity Bit */ \ - SW_WRITE_BIT(0U); /* Stop Bit */ \ - SW_WRITE_BIT(1U); /* Park Bit */ \ - \ - /* Turnaround */ \ - PIN_SWDIO_OUT_DISABLE(); \ - for (n = DAP_Data.swd_conf.turnaround; n; n--) { \ - SW_CLOCK_CYCLE(); \ - } \ - \ - /* Acknowledge response */ \ - SW_READ_BIT(bit); \ - ack = bit << 0; \ - SW_READ_BIT(bit); \ - ack |= bit << 1; \ - SW_READ_BIT(bit); \ - ack |= bit << 2; \ - \ - if (ack == DAP_TRANSFER_OK) { /* OK response */ \ - /* Data transfer */ \ - if (request & DAP_TRANSFER_RnW) { \ - /* Read data */ \ - val = 0U; \ - parity = 0U; \ - for (n = 32U; n; n--) { \ - SW_READ_BIT(bit); /* Read RDATA[0:31] */ \ - parity += bit; \ - val >>= 1; \ - val |= bit << 31; \ - } \ - SW_READ_BIT(bit); /* Read Parity */ \ - if ((parity ^ bit) & 1U) { \ - ack = DAP_TRANSFER_ERROR; \ - } \ - if (data) { *data = val; } \ - /* Turnaround */ \ - for (n = DAP_Data.swd_conf.turnaround; n; n--) { \ - SW_CLOCK_CYCLE(); \ - } \ - PIN_SWDIO_OUT_ENABLE(); \ - } else { \ - /* Turnaround */ \ - for (n = DAP_Data.swd_conf.turnaround; n; n--) { \ - SW_CLOCK_CYCLE(); \ - } \ - PIN_SWDIO_OUT_ENABLE(); \ - /* Write data */ \ - val = *data; \ - parity = 0U; \ - for (n = 32U; n; n--) { \ - SW_WRITE_BIT(val); /* Write WDATA[0:31] */ \ - parity += val; \ - val >>= 1; \ - } \ - SW_WRITE_BIT(parity); /* Write Parity Bit */ \ - } \ - /* Capture Timestamp */ \ - if (request & DAP_TRANSFER_TIMESTAMP) { \ - DAP_Data.timestamp = TIMESTAMP_GET(); \ - } \ - /* Idle cycles */ \ - n = DAP_Data.transfer.idle_cycles; \ - if (n) { \ - PIN_SWDIO_OUT(0U); \ - for (; n; n--) { \ - SW_CLOCK_CYCLE(); \ - } \ - } \ - PIN_SWDIO_OUT(1U); \ - return ((uint8_t)ack); \ - } \ - \ - if ((ack == DAP_TRANSFER_WAIT) || (ack == DAP_TRANSFER_FAULT)) { \ - /* WAIT or FAULT response */ \ - if (DAP_Data.swd_conf.data_phase && ((request & DAP_TRANSFER_RnW) != 0U)) { \ - for (n = 32U+1U; n; n--) { \ - SW_CLOCK_CYCLE(); /* Dummy Read RDATA[0:31] + Parity */ \ - } \ - } \ - /* Turnaround */ \ - for (n = DAP_Data.swd_conf.turnaround; n; n--) { \ - SW_CLOCK_CYCLE(); \ - } \ - PIN_SWDIO_OUT_ENABLE(); \ - if (DAP_Data.swd_conf.data_phase && ((request & DAP_TRANSFER_RnW) == 0U)) { \ - PIN_SWDIO_OUT(0U); \ - for (n = 32U+1U; n; n--) { \ - SW_CLOCK_CYCLE(); /* Dummy Write WDATA[0:31] + Parity */ \ - } \ - } \ - PIN_SWDIO_OUT(1U); \ - return ((uint8_t)ack); \ - } \ - \ - /* Protocol error */ \ - for (n = DAP_Data.swd_conf.turnaround + 32U + 1U; n; n--) { \ - SW_CLOCK_CYCLE(); /* Back off data phase */ \ - } \ - PIN_SWDIO_OUT_ENABLE(); \ - PIN_SWDIO_OUT(1U); \ - return ((uint8_t)ack); \ -} - - -#undef PIN_DELAY -#define PIN_DELAY() PIN_DELAY_FAST() -SWD_TransferFunction(Fast) - -#undef PIN_DELAY -#define PIN_DELAY() PIN_DELAY_SLOW(DAP_Data.clock_delay) -SWD_TransferFunction(Slow) - - -// SWD Transfer I/O -// request: A[3:2] RnW APnDP -// data: DATA[31:0] -// return: ACK[2:0] -uint8_t SWD_Transfer(uint32_t request, uint32_t *data) { - if (DAP_Data.fast_clock) { - return SWD_TransferFast(request, data); - } else { - return SWD_TransferSlow(request, data); - } -} - - -#endif /* (DAP_SWD != 0) */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/UART.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/UART.c deleted file mode 100644 index 8e9eae5..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DAP/Firmware/Source/UART.c +++ /dev/null @@ -1,652 +0,0 @@ -/* - * Copyright (c) 2021 ARM Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * ---------------------------------------------------------------------- - * - * $Date: 1. March 2021 - * $Revision: V1.0.0 - * - * Project: CMSIS-DAP Source - * Title: UART.c CMSIS-DAP UART - * - *---------------------------------------------------------------------------*/ - -#include "DAP_config.h" -#include "DAP.h" - -#if (DAP_UART != 0) - -#ifdef DAP_FW_V1 -#error "UART Communication Port not supported in DAP V1!" -#endif - -#include "Driver_USART.h" - -#include "cmsis_os2.h" -#include - -#define UART_RX_BLOCK_SIZE 32U /* Uart Rx Block Size (must be 2^n) */ - -// USART Driver -#define _USART_Driver_(n) Driver_USART##n -#define USART_Driver_(n) _USART_Driver_(n) -extern ARM_DRIVER_USART USART_Driver_(DAP_UART_DRIVER); -#define pUSART (&USART_Driver_(DAP_UART_DRIVER)) - -// UART Configuration -#if (DAP_UART_USB_COM_PORT != 0) -static uint8_t UartTransport = DAP_UART_TRANSPORT_USB_COM_PORT; -#else -static uint8_t UartTransport = DAP_UART_TRANSPORT_NONE; -#endif - -// UART Flags -static uint8_t UartConfigured = 0U; -static uint8_t UartReceiveEnabled = 0U; -static uint8_t UartTransmitEnabled = 0U; -static uint8_t UartTransmitActive = 0U; - -// UART TX Buffer -static uint8_t UartTxBuf[DAP_UART_TX_BUFFER_SIZE]; -static volatile uint32_t UartTxIndexI = 0U; -static volatile uint32_t UartTxIndexO = 0U; - -// UART RX Buffer -static uint8_t UartRxBuf[DAP_UART_RX_BUFFER_SIZE]; -static volatile uint32_t UartRxIndexI = 0U; -static volatile uint32_t UartRxIndexO = 0U; - -// Uart Errors -static volatile uint8_t UartErrorRxDataLost = 0U; -static volatile uint8_t UartErrorFraming = 0U; -static volatile uint8_t UartErrorParity = 0U; - -// UART Transmit -static uint32_t UartTxNum = 0U; - -// Function prototypes -static uint8_t UART_Init (void); -static void UART_Uninit (void); -static uint8_t UART_Get_Status (void); -static uint8_t UART_Receive_Enable (void); -static uint8_t UART_Transmit_Enable (void); -static void UART_Receive_Disable (void); -static void UART_Transmit_Disable (void); -static void UART_Receive_Flush (void); -static void UART_Transmit_Flush (void); -static void UART_Receive (void); -static void UART_Transmit (void); - - -// USART Driver Callback function -// event: event mask -static void USART_Callback (uint32_t event) { - if (event & ARM_USART_EVENT_SEND_COMPLETE) { - UartTxIndexO += UartTxNum; - UartTransmitActive = 0U; - UART_Transmit(); - } - if (event & ARM_USART_EVENT_RECEIVE_COMPLETE) { - UartRxIndexI += UART_RX_BLOCK_SIZE; - UART_Receive(); - } - if (event & ARM_USART_EVENT_RX_OVERFLOW) { - UartErrorRxDataLost = 1U; - } - if (event & ARM_USART_EVENT_RX_FRAMING_ERROR) { - UartErrorFraming = 1U; - } - if (event & ARM_USART_EVENT_RX_PARITY_ERROR) { - UartErrorParity = 1U; - } -} - -// Init UART -// return: DAP_OK or DAP_ERROR -static uint8_t UART_Init (void) { - int32_t status; - uint8_t ret = DAP_ERROR; - - UartConfigured = 0U; - UartReceiveEnabled = 0U; - UartTransmitEnabled = 0U; - UartTransmitActive = 0U; - UartErrorRxDataLost = 0U; - UartErrorFraming = 0U; - UartErrorParity = 0U; - UartTxIndexI = 0U; - UartTxIndexO = 0U; - UartRxIndexI = 0U; - UartRxIndexO = 0U; - UartTxNum = 0U; - - status = pUSART->Initialize(USART_Callback); - if (status == ARM_DRIVER_OK) { - status = pUSART->PowerControl(ARM_POWER_FULL); - } - if (status == ARM_DRIVER_OK) { - ret = DAP_OK; - } - - return (ret); -} - -// Un-Init UART -static void UART_Uninit (void) { - UartConfigured = 0U; - - pUSART->PowerControl(ARM_POWER_OFF); - pUSART->Uninitialize(); -} - -// Get UART Status -// return: status -static uint8_t UART_Get_Status (void) { - uint8_t status = 0U; - - if (UartReceiveEnabled != 0U) { - status |= DAP_UART_STATUS_RX_ENABLED; - } - if (UartErrorRxDataLost != 0U) { - UartErrorRxDataLost = 0U; - status |= DAP_UART_STATUS_RX_DATA_LOST; - } - if (UartErrorFraming != 0U) { - UartErrorFraming = 0U; - status |= DAP_UART_STATUS_FRAMING_ERROR; - } - if (UartErrorParity != 0U) { - UartErrorParity = 0U; - status |= DAP_UART_STATUS_PARITY_ERROR; - } - if (UartTransmitEnabled != 0U) { - status |= DAP_UART_STATUS_TX_ENABLED; - } - - return (status); -} - -// Enable UART Receive -// return: DAP_OK or DAP_ERROR -static uint8_t UART_Receive_Enable (void) { - int32_t status; - uint8_t ret = DAP_ERROR; - - if (UartReceiveEnabled == 0U) { - // Flush Buffers - UartRxIndexI = 0U; - UartRxIndexO = 0U; - - UART_Receive(); - status = pUSART->Control(ARM_USART_CONTROL_RX, 1U); - if (status == ARM_DRIVER_OK) { - UartReceiveEnabled = 1U; - ret = DAP_OK; - } - } else { - ret = DAP_OK; - } - - return (ret); -} - -// Enable UART Transmit -// return: DAP_OK or DAP_ERROR -static uint8_t UART_Transmit_Enable (void) { - int32_t status; - uint8_t ret = DAP_ERROR; - - if (UartTransmitEnabled == 0U) { - // Flush Buffers - UartTransmitActive = 0U; - UartTxIndexI = 0U; - UartTxIndexO = 0U; - UartTxNum = 0U; - - status = pUSART->Control(ARM_USART_CONTROL_TX, 1U); - if (status == ARM_DRIVER_OK) { - UartTransmitEnabled = 1U; - ret = DAP_OK; - } - } else { - ret = DAP_OK; - } - - return (ret); -} - -// Disable UART Receive -static void UART_Receive_Disable (void) { - if (UartReceiveEnabled != 0U) { - pUSART->Control(ARM_USART_CONTROL_RX, 0U); - pUSART->Control(ARM_USART_ABORT_RECEIVE, 0U); - UartReceiveEnabled = 0U; - } -} - -// Disable UART Transmit -static void UART_Transmit_Disable (void) { - if (UartTransmitEnabled != 0U) { - pUSART->Control(ARM_USART_ABORT_SEND, 0U); - pUSART->Control(ARM_USART_CONTROL_TX, 0U); - UartTransmitActive = 0U; - UartTransmitEnabled = 0U; - } -} - -// Flush UART Receive buffer -static void UART_Receive_Flush (void) { - pUSART->Control(ARM_USART_ABORT_RECEIVE, 0U); - UartRxIndexI = 0U; - UartRxIndexO = 0U; - if (UartReceiveEnabled != 0U) { - UART_Receive(); - } -} - -// Flush UART Transmit buffer -static void UART_Transmit_Flush (void) { - pUSART->Control(ARM_USART_ABORT_SEND, 0U); - UartTransmitActive = 0U; - UartTxIndexI = 0U; - UartTxIndexO = 0U; - UartTxNum = 0U; -} - -// Receive data from target via UART -static void UART_Receive (void) { - uint32_t index; - - index = UartRxIndexI & (DAP_UART_RX_BUFFER_SIZE - 1U); - pUSART->Receive(&UartRxBuf[index], UART_RX_BLOCK_SIZE); -} - -// Transmit available data to target via UART -static void UART_Transmit (void) { - uint32_t count; - uint32_t index; - - count = UartTxIndexI - UartTxIndexO; - index = UartTxIndexO & (DAP_UART_TX_BUFFER_SIZE - 1U); - - if (count != 0U) { - if ((index + count) <= DAP_UART_TX_BUFFER_SIZE) { - UartTxNum = count; - } else { - UartTxNum = DAP_UART_TX_BUFFER_SIZE - index; - } - UartTransmitActive = 1U; - pUSART->Send(&UartTxBuf[index], UartTxNum); - } -} - -// Process UART Transport command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -uint32_t UART_Transport (const uint8_t *request, uint8_t *response) { - uint8_t transport; - uint8_t ret = DAP_ERROR; - - transport = *request; - switch (transport) { - case DAP_UART_TRANSPORT_NONE: - switch (UartTransport) { - case DAP_UART_TRANSPORT_NONE: - ret = DAP_OK; - break; - case DAP_UART_TRANSPORT_USB_COM_PORT: -#if (DAP_UART_USB_COM_PORT != 0) - USB_COM_PORT_Activate(0U); - UartTransport = DAP_UART_TRANSPORT_NONE; - ret = DAP_OK; -#endif - break; - case DAP_UART_TRANSPORT_DAP_COMMAND: - UART_Receive_Disable(); - UART_Transmit_Disable(); - UART_Uninit(); - UartTransport = DAP_UART_TRANSPORT_NONE; - ret= DAP_OK; - break; - } - break; - case DAP_UART_TRANSPORT_USB_COM_PORT: - switch (UartTransport) { - case DAP_UART_TRANSPORT_NONE: -#if (DAP_UART_USB_COM_PORT != 0) - if (USB_COM_PORT_Activate(1U) == 0U) { - UartTransport = DAP_UART_TRANSPORT_USB_COM_PORT; - ret = DAP_OK; - } -#endif - break; - case DAP_UART_TRANSPORT_USB_COM_PORT: - ret = DAP_OK; - break; - case DAP_UART_TRANSPORT_DAP_COMMAND: - UART_Receive_Disable(); - UART_Transmit_Disable(); - UART_Uninit(); - UartTransport = DAP_UART_TRANSPORT_NONE; -#if (DAP_UART_USB_COM_PORT != 0) - if (USB_COM_PORT_Activate(1U) == 0U) { - UartTransport = DAP_UART_TRANSPORT_USB_COM_PORT; - ret = DAP_OK; - } -#endif - break; - } - break; - case DAP_UART_TRANSPORT_DAP_COMMAND: - switch (UartTransport) { - case DAP_UART_TRANSPORT_NONE: - ret = UART_Init(); - if (ret == DAP_OK) { - UartTransport = DAP_UART_TRANSPORT_DAP_COMMAND; - } - break; - case DAP_UART_TRANSPORT_USB_COM_PORT: -#if (DAP_UART_USB_COM_PORT != 0) - USB_COM_PORT_Activate(0U); - UartTransport = DAP_UART_TRANSPORT_NONE; -#endif - ret = UART_Init(); - if (ret == DAP_OK) { - UartTransport = DAP_UART_TRANSPORT_DAP_COMMAND; - } - break; - case DAP_UART_TRANSPORT_DAP_COMMAND: - ret = DAP_OK; - break; - } - break; - default: - break; - } - - *response = ret; - - return ((1U << 16) | 1U); -} - -// Process UART Configure command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -uint32_t UART_Configure (const uint8_t *request, uint8_t *response) { - uint8_t control, status; - uint32_t baudrate; - int32_t result; - - if (UartTransport != DAP_UART_TRANSPORT_DAP_COMMAND) { - status = DAP_UART_CFG_ERROR_DATA_BITS | - DAP_UART_CFG_ERROR_PARITY | - DAP_UART_CFG_ERROR_STOP_BITS; - baudrate = 0U; // baudrate error - } else { - - status = 0U; - control = *request; - baudrate = (uint32_t)(*(request+1) << 0) | - (uint32_t)(*(request+2) << 8) | - (uint32_t)(*(request+3) << 16) | - (uint32_t)(*(request+4) << 24); - - result = pUSART->Control(control | - ARM_USART_MODE_ASYNCHRONOUS | - ARM_USART_FLOW_CONTROL_NONE, - baudrate); - if (result == ARM_DRIVER_OK) { - UartConfigured = 1U; - } else { - UartConfigured = 0U; - switch (result) { - case ARM_USART_ERROR_BAUDRATE: - status = 0U; - baudrate = 0U; - break; - case ARM_USART_ERROR_DATA_BITS: - status = DAP_UART_CFG_ERROR_DATA_BITS; - break; - case ARM_USART_ERROR_PARITY: - status = DAP_UART_CFG_ERROR_PARITY; - break; - case ARM_USART_ERROR_STOP_BITS: - status = DAP_UART_CFG_ERROR_STOP_BITS; - break; - default: - status = DAP_UART_CFG_ERROR_DATA_BITS | - DAP_UART_CFG_ERROR_PARITY | - DAP_UART_CFG_ERROR_STOP_BITS; - baudrate = 0U; - break; - } - } - } - - *response++ = status; - *response++ = (uint8_t)(baudrate >> 0); - *response++ = (uint8_t)(baudrate >> 8); - *response++ = (uint8_t)(baudrate >> 16); - *response = (uint8_t)(baudrate >> 24); - - return ((5U << 16) | 5U); -} - -// Process UART Control command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -uint32_t UART_Control (const uint8_t *request, uint8_t *response) { - uint8_t control; - uint8_t result; - uint8_t ret = DAP_OK; - - if (UartTransport != DAP_UART_TRANSPORT_DAP_COMMAND) { - ret = DAP_ERROR; - } else { - - control = *request; - - if ((control & DAP_UART_CONTROL_RX_DISABLE) != 0U) { - // Receive disable - UART_Receive_Disable(); - } else if ((control & DAP_UART_CONTROL_RX_ENABLE) != 0U) { - // Receive enable - if (UartConfigured != 0U) { - result = UART_Receive_Enable(); - if (result != DAP_OK) { - ret = DAP_ERROR; - } - } else { - ret = DAP_ERROR; - } - } - if ((control & DAP_UART_CONTROL_RX_BUF_FLUSH) != 0U) { - UART_Receive_Flush(); - } - - if ((control & DAP_UART_CONTROL_TX_DISABLE) != 0U) { - // Transmit disable - UART_Transmit_Disable(); - } else if ((control & DAP_UART_CONTROL_TX_ENABLE) != 0U) { - // Transmit enable - if (UartConfigured != 0U) { - result = UART_Transmit_Enable(); - if (result != DAP_OK) { - ret = DAP_ERROR; - } - } else { - ret = DAP_ERROR; - } - } - if ((control & DAP_UART_CONTROL_TX_BUF_FLUSH) != 0U) { - UART_Transmit_Flush(); - } - } - - *response = ret; - - return ((1U << 16) | 1U); -} - -// Process UART Status command and prepare response -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -uint32_t UART_Status (uint8_t *response) { - uint32_t rx_cnt, tx_cnt; - uint32_t cnt; - uint8_t status; - - if ((UartTransport != DAP_UART_TRANSPORT_DAP_COMMAND) || - (UartConfigured == 0U)) { - rx_cnt = 0U; - tx_cnt = 0U; - status = 0U; - } else { - - rx_cnt = UartRxIndexI - UartRxIndexO; - rx_cnt += pUSART->GetRxCount(); - if (rx_cnt > (DAP_UART_RX_BUFFER_SIZE - (UART_RX_BLOCK_SIZE*2))) { - // Overflow - UartErrorRxDataLost = 1U; - rx_cnt = (DAP_UART_RX_BUFFER_SIZE - (UART_RX_BLOCK_SIZE*2)); - UartRxIndexO = UartRxIndexI - rx_cnt; - } - - tx_cnt = UartTxIndexI - UartTxIndexO; - cnt = pUSART->GetTxCount(); - if (UartTransmitActive != 0U) { - tx_cnt -= cnt; - } - - status = UART_Get_Status(); - } - - *response++ = status; - *response++ = (uint8_t)(rx_cnt >> 0); - *response++ = (uint8_t)(rx_cnt >> 8); - *response++ = (uint8_t)(rx_cnt >> 16); - *response++ = (uint8_t)(rx_cnt >> 24); - *response++ = (uint8_t)(tx_cnt >> 0); - *response++ = (uint8_t)(tx_cnt >> 8); - *response++ = (uint8_t)(tx_cnt >> 16); - *response = (uint8_t)(tx_cnt >> 24); - - return ((0U << 16) | 9U); -} - -// Process UART Transfer command and prepare response -// request: pointer to request data -// response: pointer to response data -// return: number of bytes in response (lower 16 bits) -// number of bytes in request (upper 16 bits) -uint32_t UART_Transfer (const uint8_t *request, uint8_t *response) { - uint32_t rx_cnt, tx_cnt; - uint32_t rx_num, tx_num; - uint8_t *rx_data; - const - uint8_t *tx_data; - uint32_t num; - uint32_t index; - uint8_t status; - - if (UartTransport != DAP_UART_TRANSPORT_DAP_COMMAND) { - status = 0U; - rx_cnt = 0U; - tx_cnt = 0U; - } else { - - // RX Data - rx_cnt = ((uint32_t)(*(request+0) << 0) | - (uint32_t)(*(request+1) << 8)); - - if (rx_cnt > (DAP_PACKET_SIZE - 6U)) { - rx_cnt = (DAP_PACKET_SIZE - 6U); - } - rx_num = UartRxIndexI - UartRxIndexO; - rx_num += pUSART->GetRxCount(); - if (rx_num > (DAP_UART_RX_BUFFER_SIZE - (UART_RX_BLOCK_SIZE*2))) { - // Overflow - UartErrorRxDataLost = 1U; - rx_num = (DAP_UART_RX_BUFFER_SIZE - (UART_RX_BLOCK_SIZE*2)); - UartRxIndexO = UartRxIndexI - rx_num; - } - if (rx_cnt > rx_num) { - rx_cnt = rx_num; - } - - rx_data = (response+5); - index = UartRxIndexO & (DAP_UART_RX_BUFFER_SIZE - 1U); - if ((index + rx_cnt) <= DAP_UART_RX_BUFFER_SIZE) { - memcpy( rx_data, &UartRxBuf[index], rx_cnt); - } else { - num = DAP_UART_RX_BUFFER_SIZE - index; - memcpy( rx_data, &UartRxBuf[index], num); - memcpy(&rx_data[num], &UartRxBuf[0], rx_cnt - num); - } - UartRxIndexO += rx_cnt; - - // TX Data - tx_cnt = ((uint32_t)(*(request+2) << 0) | - (uint32_t)(*(request+3) << 8)); - tx_data = (request+4); - - if (tx_cnt > (DAP_PACKET_SIZE - 5U)) { - tx_cnt = (DAP_PACKET_SIZE - 5U); - } - tx_num = UartTxIndexI - UartTxIndexO; - num = pUSART->GetTxCount(); - if (UartTransmitActive != 0U) { - tx_num -= num; - } - if (tx_cnt > (DAP_UART_TX_BUFFER_SIZE - tx_num)) { - tx_cnt = (DAP_UART_TX_BUFFER_SIZE - tx_num); - } - - index = UartTxIndexI & (DAP_UART_TX_BUFFER_SIZE - 1U); - if ((index + tx_cnt) <= DAP_UART_TX_BUFFER_SIZE) { - memcpy(&UartTxBuf[index], tx_data, tx_cnt); - } else { - num = DAP_UART_TX_BUFFER_SIZE - index; - memcpy(&UartTxBuf[index], tx_data, num); - memcpy(&UartTxBuf[0], &tx_data[num], tx_cnt - num); - } - UartTxIndexI += tx_cnt; - - if (UartTransmitActive == 0U) { - UART_Transmit(); - } - - status = UART_Get_Status(); - } - - *response++ = status; - *response++ = (uint8_t)(tx_cnt >> 0); - *response++ = (uint8_t)(tx_cnt >> 8); - *response++ = (uint8_t)(rx_cnt >> 0); - *response = (uint8_t)(rx_cnt >> 8); - - return (((4U + tx_cnt) << 16) | (5U + rx_cnt)); -} - -#endif /* DAP_UART */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/BasicMathFunctions.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/BasicMathFunctions/BasicMathFunctions.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/BasicMathFunctions.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/BasicMathFunctions/BasicMathFunctions.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/BasicMathFunctionsF16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/BasicMathFunctions/BasicMathFunctionsF16.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/BasicMathFunctionsF16.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/BasicMathFunctions/BasicMathFunctionsF16.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/BayesFunctions.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/BayesFunctions/BayesFunctions.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/BayesFunctions.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/BayesFunctions/BayesFunctions.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/BayesFunctionsF16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/BayesFunctions/BayesFunctionsF16.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/BayesFunctionsF16.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/BayesFunctions/BayesFunctionsF16.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/CommonTables.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/CommonTables/CommonTables.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/CommonTables.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/CommonTables/CommonTables.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/CommonTablesF16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/CommonTables/CommonTablesF16.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/CommonTablesF16.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/CommonTables/CommonTablesF16.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/ComplexMathFunctions.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/ComplexMathFunctions/ComplexMathFunctions.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/ComplexMathFunctions.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/ComplexMathFunctions/ComplexMathFunctions.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/ComplexMathFunctionsF16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/ComplexMathFunctions/ComplexMathFunctionsF16.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/ComplexMathFunctionsF16.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/ComplexMathFunctions/ComplexMathFunctionsF16.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/ControllerFunctions.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/ControllerFunctions/ControllerFunctions.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/ControllerFunctions.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/ControllerFunctions/ControllerFunctions.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/DistanceFunctions.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/DistanceFunctions/DistanceFunctions.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/DistanceFunctions.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/DistanceFunctions/DistanceFunctions.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/DistanceFunctionsF16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/DistanceFunctions/DistanceFunctionsF16.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/DistanceFunctionsF16.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/DistanceFunctions/DistanceFunctionsF16.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/FastMathFunctions.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/FastMathFunctions/FastMathFunctions.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/FastMathFunctions.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/FastMathFunctions/FastMathFunctions.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/FastMathFunctionsF16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/FastMathFunctions/FastMathFunctionsF16.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/FastMathFunctionsF16.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/FastMathFunctions/FastMathFunctionsF16.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/FilteringFunctions.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/FilteringFunctions/FilteringFunctions.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/FilteringFunctions.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/FilteringFunctions/FilteringFunctions.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/FilteringFunctionsF16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/FilteringFunctions/FilteringFunctionsF16.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/FilteringFunctionsF16.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/FilteringFunctions/FilteringFunctionsF16.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/InterpolationFunctions.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/InterpolationFunctions/InterpolationFunctions.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/InterpolationFunctions.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/InterpolationFunctions/InterpolationFunctions.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/InterpolationFunctionsF16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/InterpolationFunctions/InterpolationFunctionsF16.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/InterpolationFunctionsF16.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/InterpolationFunctions/InterpolationFunctionsF16.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/MatrixFunctions.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/MatrixFunctions/MatrixFunctions.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/MatrixFunctions.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/MatrixFunctions/MatrixFunctions.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/MatrixFunctionsF16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/MatrixFunctions/MatrixFunctionsF16.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/MatrixFunctionsF16.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/MatrixFunctions/MatrixFunctionsF16.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/QuaternionMathFunctions.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/QuaternionMathFunctions/QuaternionMathFunctions.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/QuaternionMathFunctions.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/QuaternionMathFunctions/QuaternionMathFunctions.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/SVMFunctions.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/SVMFunctions/SVMFunctions.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/SVMFunctions.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/SVMFunctions/SVMFunctions.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/SVMFunctionsF16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/SVMFunctions/SVMFunctionsF16.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/SVMFunctionsF16.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/SVMFunctions/SVMFunctionsF16.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/StatisticsFunctions.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/StatisticsFunctions.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/StatisticsFunctionsF16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctionsF16.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/StatisticsFunctionsF16.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctionsF16.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/SupportFunctions.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/SupportFunctions/SupportFunctions.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/SupportFunctions.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/SupportFunctions/SupportFunctions.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/SupportFunctionsF16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/SupportFunctions/SupportFunctionsF16.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/SupportFunctionsF16.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/SupportFunctions/SupportFunctionsF16.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/TransformFunctions.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/TransformFunctions/TransformFunctions.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/TransformFunctions.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/TransformFunctions/TransformFunctions.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/TransformFunctionsF16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/TransformFunctions/TransformFunctionsF16.c similarity index 100% rename from MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/TransformFunctionsF16.c rename to MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/DSP/Source/TransformFunctions/TransformFunctionsF16.c diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/CMakeLists.txt b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/CMakeLists.txt deleted file mode 100644 index ad3b748..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/CMakeLists.txt +++ /dev/null @@ -1,29 +0,0 @@ -# -# Copyright (c) 2019-2021 Arm Limited. -# -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the License); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an AS IS BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -cmake_minimum_required(VERSION 3.15.6) - -project(CMSISNN) - -set(CMSIS_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../..") - -option(BUILD_CMSIS_NN_FUNCTIONS "Build CMSIS-NN Source." ON) - -if(BUILD_CMSIS_NN_FUNCTIONS) - add_subdirectory(Source) -endif() diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nn_math_types.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nn_math_types.h deleted file mode 100644 index 390fe78..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nn_math_types.h +++ /dev/null @@ -1,169 +0,0 @@ -/****************************************************************************** - * @file arm_nn_math_types.h - * @brief Compiler include and basic types - * @version V1.1.0 - * @date 09 March 2022 - * Target Processor: Cortex-M - ******************************************************************************/ -/* - * Copyright (c) 2010-2022 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/** - Copied from CMSIS/DSP/arm_math_types.h and modified -*/ - -#ifndef _ARM_NN_MATH_TYPES_H_ - -#define _ARM_NN_MATH_TYPES_H_ - -/* DSP inlcude for enum arm_status. */ -#include "arm_math_types.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/* Compiler specific diagnostic adjustment */ -#if defined(__CC_ARM) - -#elif defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) - -#elif defined(__GNUC__) - -#elif defined(__ICCARM__) - -#elif defined(__TI_ARM__) - -#elif defined(__CSMC__) - -#elif defined(__TASKING__) - -#elif defined(_MSC_VER) - -#else -#error Unknown compiler -#endif - -/* Included for instrinsics definitions */ -#if defined(_MSC_VER) -#include -#ifndef __STATIC_FORCEINLINE -#define __STATIC_FORCEINLINE static __forceinline -#endif -#ifndef __STATIC_INLINE -#define __STATIC_INLINE static __inline -#endif -#ifndef __ALIGNED -#define __ALIGNED(x) __declspec(align(x)) -#endif - -#elif defined(__GNUC_PYTHON__) -#include -#ifndef __ALIGNED -#define __ALIGNED(x) __attribute__((aligned(x))) -#endif -#ifndef __STATIC_FORCEINLINE -#define __STATIC_FORCEINLINE static inline __attribute__((always_inline)) -#endif -#ifndef __STATIC_INLINE -#define __STATIC_INLINE static inline -#endif - -#else -#include "cmsis_compiler.h" -#endif - -#include -#include -#include -#include - -/* evaluate ARM DSP feature */ -#if (defined(__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1)) -#ifndef ARM_MATH_DSP -#define ARM_MATH_DSP 1 -#endif -#endif - -#if __ARM_FEATURE_MVE -#ifndef ARM_MATH_MVEI -#define ARM_MATH_MVEI -#endif -#endif - -/* Compiler specific diagnostic adjustment */ -#if defined(__CC_ARM) - -#elif defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) - -#elif defined(__GNUC__) -// #pragma GCC diagnostic pop - -#elif defined(__ICCARM__) - -#elif defined(__TI_ARM__) - -#elif defined(__CSMC__) - -#elif defined(__TASKING__) - -#elif defined(_MSC_VER) - -#else -#error Unknown compiler -#endif - -#ifdef __cplusplus -} -#endif - -#if __ARM_FEATURE_MVE -#include -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * @brief Add necessary typedefs - */ - -#define NN_Q31_MAX ((q31_t)(0x7FFFFFFFL)) -#define NN_Q15_MAX ((q15_t)(0x7FFF)) -#define NN_Q7_MAX ((q7_t)(0x7F)) -#define NN_Q31_MIN ((q31_t)(0x80000000L)) -#define NN_Q15_MIN ((q15_t)(0x8000)) -#define NN_Q7_MIN ((q7_t)(0x80)) - -/** - * @brief Error status returned by some functions in the library. - */ - -typedef enum -{ - ARM_CMSIS_NN_SUCCESS = 0, /**< No error */ - ARM_CMSIS_NN_ARG_ERROR = -1, /**< One or more arguments are incorrect */ - ARM_CMSIS_NN_NO_IMPL_ERROR = -2, /**< No implementation available */ -} arm_cmsis_nn_status; - -#ifdef __cplusplus -} -#endif - -#endif /*ifndef _ARM_NN_MATH_TYPES_H_ */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nn_tables.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nn_tables.h deleted file mode 100644 index 327294d..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nn_tables.h +++ /dev/null @@ -1,56 +0,0 @@ -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_tables.h - * Description: Extern declaration for NN tables - * - * $Date: 17. August 2021 - * $Revision: V.1.0.2 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef _ARM_NN_TABLES_H -#define _ARM_NN_TABLES_H - -#include "arm_nn_math_types.h" - -/** - * @brief tables for various activation functions - * - */ - -extern const q15_t sigmoidTable_q15[256]; -extern const q7_t sigmoidTable_q7[256]; - -extern const q7_t tanhTable_q7[256]; -extern const q15_t tanhTable_q15[256]; - -/** - * @brief 2-way tables for various activation functions - * - * 2-way table, H table for value larger than 1/4 - * L table for value smaller than 1/4, H table for remaining - * We have this only for the q15_t version. It does not make - * sense to have it for q7_t type - */ -extern const q15_t sigmoidHTable_q15[192]; -extern const q15_t sigmoidLTable_q15[128]; - -#endif /* ARM_NN_TABLES_H */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nn_types.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nn_types.h deleted file mode 100644 index 6040d72..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nn_types.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (C) 2020-2022 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_types.h - * Description: Public header file to contain the CMSIS-NN structs for the - * TensorFlowLite micro compliant functions - * - * $Date: 22. Februari 2022 - * $Revision: V.2.1.0 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ - -#ifndef _ARM_NN_TYPES_H -#define _ARM_NN_TYPES_H - -#include - -/** CMSIS-NN object to contain the width and height of a tile */ -typedef struct -{ - int32_t w; /**< Width */ - int32_t h; /**< Height */ -} cmsis_nn_tile; - -/** CMSIS-NN object used for the function context. */ -typedef struct -{ - void *buf; /**< Pointer to a buffer needed for the optimization */ - int32_t size; /**< Buffer size */ -} cmsis_nn_context; - -/** CMSIS-NN object to contain the dimensions of the tensors */ -typedef struct -{ - int32_t n; /**< Generic dimension to contain either the batch size or output channels. - Please refer to the function documentation for more information */ - int32_t h; /**< Height */ - int32_t w; /**< Width */ - int32_t c; /**< Input channels */ -} cmsis_nn_dims; - -/** CMSIS-NN object for the per-channel quantization parameters */ -typedef struct -{ - int32_t *multiplier; /**< Multiplier values */ - int32_t *shift; /**< Shift values */ -} cmsis_nn_per_channel_quant_params; - -/** CMSIS-NN object for the per-tensor quantization parameters */ -typedef struct -{ - int32_t multiplier; /**< Multiplier value */ - int32_t shift; /**< Shift value */ -} cmsis_nn_per_tensor_quant_params; - -/** CMSIS-NN object for the quantized Relu activation */ -typedef struct -{ - int32_t min; /**< Min value used to clamp the result */ - int32_t max; /**< Max value used to clamp the result */ -} cmsis_nn_activation; - -/** CMSIS-NN object for the convolution layer parameters */ -typedef struct -{ - int32_t input_offset; /**< Zero value for the input tensor */ - int32_t output_offset; /**< Zero value for the output tensor */ - cmsis_nn_tile stride; - cmsis_nn_tile padding; - cmsis_nn_tile dilation; - cmsis_nn_activation activation; -} cmsis_nn_conv_params; - -/** CMSIS-NN object for Depthwise convolution layer parameters */ -typedef struct -{ - int32_t input_offset; /**< Zero value for the input tensor */ - int32_t output_offset; /**< Zero value for the output tensor */ - int32_t ch_mult; /**< Channel Multiplier. ch_mult * in_ch = out_ch */ - cmsis_nn_tile stride; - cmsis_nn_tile padding; - cmsis_nn_tile dilation; - cmsis_nn_activation activation; -} cmsis_nn_dw_conv_params; -/** CMSIS-NN object for pooling layer parameters */ -typedef struct -{ - cmsis_nn_tile stride; - cmsis_nn_tile padding; - cmsis_nn_activation activation; -} cmsis_nn_pool_params; - -/** CMSIS-NN object for Fully Connected layer parameters */ -typedef struct -{ - int32_t input_offset; /**< Zero value for the input tensor */ - int32_t filter_offset; /**< Zero value for the filter tensor. Not used */ - int32_t output_offset; /**< Zero value for the output tensor */ - cmsis_nn_activation activation; -} cmsis_nn_fc_params; - -/** CMSIS-NN object for SVDF layer parameters */ -typedef struct -{ - int32_t rank; - int32_t input_offset; /**< Zero value for the input tensor */ - int32_t output_offset; /**< Zero value for the output tensor */ - cmsis_nn_activation input_activation; - cmsis_nn_activation output_activation; -} cmsis_nn_svdf_params; - -/** CMSIS-NN object for Softmax s16 layer parameters */ -typedef struct -{ - const int16_t *exp_lut; - const int16_t *one_by_one_lut; -} cmsis_nn_softmax_lut_s16; - -#endif // _ARM_NN_TYPES_H diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nnfunctions.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nnfunctions.h deleted file mode 100644 index deaade7..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nnfunctions.h +++ /dev/null @@ -1,2532 +0,0 @@ -/* - * Copyright (C) 2010-2022 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nnfunctions.h - * Description: Public header file for CMSIS NN Library - * - * $Date: 19 April 2022 - * $Revision: V.9.0.0 - * - * Target Processor: Cortex-M CPUs - * -------------------------------------------------------------------- */ - -/** - \mainpage CMSIS NN Software Library - * - * Introduction - * ------------ - * - * This user manual describes the CMSIS NN software library, - * a collection of efficient neural network kernels developed to maximize the - * performance and minimize the memory footprint of neural networks on Cortex-M processor cores. - * - * The library is divided into a number of functions each covering a specific category: - * - Convolution Functions - * - Activation Functions - * - Fully-connected Layer Functions - * - SVDF Layer Functions - * - Pooling Functions - * - Softmax Functions - * - Basic math Functions - * - * The library has separate functions for operating on different weight and activation data - * types including 8-bit integers (q7_t) and 16-bit integers (q15_t). The descrition of the - * kernels are included in the function description. The implementation details are also - * described in this paper [1]. - * - * Supported Processors - * ------- - * CMSIS-NN targets Cortex-M processors with typically three different implementations for each function. Each - * targets a different group of processors. - * - Processors without SIMD capability (e.g, Cortex-M0) - * - Processors with DSP extention (e.g Cortex-M4) - * - Processors with MVE extension (e.g Cortex-M55) - * The right implementation is picked through feature flags and the user usually does not have to explicit set it. - * - * Function Classification - * -------- - * The functions can be classified into two segments - * - Legacy functions supporting ARM's internal symmetric quantization(8 bits). - * - Functions that support TensorFlow Lite framework with symmetric quantization(8 bits). - * - * The legacy functions can be identified with their suffix of _q7 or _q15 and are no new development is done there. - * The article in [2] describes in detail how to run a network using the legacy functions. - * - * The functions supporting TensorFlow Lite framework is identified by the _s8 suffix and can be invoked from TFL - * micro. The functions are bit exact to TensorFlow Lite. Refer to the TensorFlow's documentation in [3] on how to run - * a TensorFlow Lite model using optimized CMSIS-NN kernels. - * - * Block Diagram - * -------- - * \image html CMSIS-NN-OVERVIEW.PNG - * - * Examples - * -------- - * - * The library ships with a number of examples which demonstrate how to use the library functions. - * - * Pre-processor Macros - * ------------ - * - * Each library project have different pre-processor macros. - * - * - ARM_MATH_DSP: - * - * Define macro ARM_MATH_DSP, If the silicon supports DSP instructions(DSP extension). - * - * - ARM_MATH_MVEI: - * - * Define macro ARM_MATH_MVEI, If the silicon supports M-Profile Vector Extension. - - * - ARM_MATH_AUTOVECTORIZE - * Used in conjucture with ARM_MATH_MVEI to let the compiler auto vectorize for the functions that uses inline - * assembly. It does not affect functions that use C or intrinsics. - * - ARM_MATH_BIG_ENDIAN: - * - * Define macro ARM_MATH_BIG_ENDIAN to build the library for big endian targets. This is supported only for the legacy - * functions i.e, functions targetted at TensorFlow Lite do not support big endianness. By default library builds for - * little endian targets. - * - * - ARM_NN_TRUNCATE: - * - * Define macro ARM_NN_TRUNCATE to use floor instead of round-to-the-nearest-int for the computation. - * - * - * Copyright Notice - * ------------ - * - * Copyright (C) 2010-2019 Arm Limited. All rights reserved. - * - * [1] CMSIS-NN: Efficient Neural Network Kernels for Arm Cortex-M CPUs https://arxiv.org/abs/1801.06601 - * - * [2] Converting a Neural Network for Arm Cortex-M with CMSIS-NN - * - https://developer.arm.com/solutions/machine-learning-on-arm/developer-material/how-to-guides/converting-a-neural-network-for-arm-cortex-m-with-cmsis-nn/single-page - * [3] https://www.tensorflow.org/lite/microcontrollers/library - * - * [4] https://github.com/ARM-software/CMSIS_5/tree/develop/CMSIS/NN#legacy-vs-tfl-micro-compliant-apis - */ - -/** - * @defgroup groupNN Neural Network Functions - * A collection of functions to perform basic operations for neural network layers. Functions with a _s8 suffix support - * TensorFlow Lite framework. - */ - -#ifndef _ARM_NNFUNCTIONS_H -#define _ARM_NNFUNCTIONS_H - -#include "arm_nn_math_types.h" -#include "arm_nn_types.h" - -#define USE_INTRINSIC - -//#define ARM_NN_TRUNCATE /* This config the rounding model to floor or round to the nearest int */ - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * @brief Struct for specifying activation function types - * - */ -typedef enum -{ - ARM_SIGMOID = 0, - /**< Sigmoid activation function */ - ARM_TANH = 1, - /**< Tanh activation function */ -} arm_nn_activation_type; - -/** - * @defgroup NNConv Convolution Functions - * - * Collection of convolution, depthwise convolution functions and their variants. - * - * The convolution is implemented in 2 steps: im2col and GEMM - * - * im2col is a process of converting each patch of image data into - * a column. After im2col, the convolution is computed as matrix-matrix - * multiplication. - * - * To reduce the memory footprint, the im2col is performed partially. - * Each iteration, only a few column (i.e., patches) are generated and - * computed with GEMM kernels similar to CMSIS-DSP arm_mat_mult functions. - * - */ - -/** - * @brief s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in - cmsis-nn - * to perform the convolution. - * - * @param[in, out] ctx Function context that contains the additional buffer if required by the function. - arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required - * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). - * Range of conv_params->input_offset : [-127, 128] - * Range of conv_params->output_offset : [-128, 127] - * @param[in] quant_params Per-channel quantization info. - * It contains the multiplier and shift values to be applied to each output channel - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the - * spatial filter dimensions - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * @param[in] bias_data Bias data pointer. Data type: int32 - * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] - * @param[out] output_data Output data pointer. Data type: int8 - * - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH if argument constraints fail. or, - * ARM_MATH_SUCCESS on successful completion. - * - */ -arm_status arm_convolve_wrapper_s8(const cmsis_nn_context *ctx, - const cmsis_nn_conv_params *conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - -/** - * @brief Get the required buffer size for arm_convolve_wrapper_s8 - * - * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). - * Range of conv_params->input_offset : [-127, 128] - * Range of conv_params->output_offset : [-128, 127] - * @param[in] input_dims Input (activation) dimensions. Format: [N, H, W, C_IN] - * @param[in] filter_dims Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial - * filter dimensions - * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] - * - * @return The function returns required buffer size(bytes) - * - */ -int32_t arm_convolve_wrapper_s8_get_buffer_size(const cmsis_nn_conv_params *conv_params, - const cmsis_nn_dims *input_dims, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims); - -/** - * @brief s16 convolution layer wrapper function with the main purpose to call the optimal kernel available in - cmsis-nn - * to perform the convolution. - * - * @param[in, out] ctx Function context that contains the additional buffer if required by the function. - arm_convolve_wrapper_s8_get_buffer_size will return the buffer_size if required - * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). - * conv_params->input_offset : Not used - * conv_params->output_offset : Not used - * @param[in] quant_params Per-channel quantization info. - * It contains the multiplier and shift values to be applied to each output channel - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * @param[in] input_data Input (activation) data pointer. Data type: int16 - * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the - * spatial filter dimensions - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * @param[in] bias_data Bias data pointer. Data type: int64 - * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] - * @param[out] output_data Output data pointer. Data type: int16 - * - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH if argument constraints fail. or, - * ARM_MATH_SUCCESS on successful completion. - * - */ -arm_status arm_convolve_wrapper_s16(const cmsis_nn_context *ctx, - const cmsis_nn_conv_params *conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q15_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int64_t *bias_data, - const cmsis_nn_dims *output_dims, - q15_t *output_data); - -/** - * @brief Get the required buffer size for arm_convolve_wrapper_s16 - * - * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). - * conv_params->input_offset : Not used - * conv_params->output_offset : Not used - * @param[in] input_dims Input (activation) dimensions. Format: [N, H, W, C_IN] - * @param[in] filter_dims Filter dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the spatial - * filter dimensions - * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] - * - * @return The function returns required buffer size(bytes) - * - */ -int32_t arm_convolve_wrapper_s16_get_buffer_size(const cmsis_nn_conv_params *conv_params, - const cmsis_nn_dims *input_dims, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims); - -/** - * @brief Basic s8 convolution function - * @param[in, out] ctx Function context that contains the additional buffer if required by the function. - arm_convolve_s8_get_buffer_size will return the buffer_size if required - * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). - * Range of conv_params->input_offset : [-127, 128] - * Range of conv_params->output_offset : [-128, 127] - * @param[in] quant_params Per-channel quantization info. - * It contains the multiplier and shift values to be applied to each output channel - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the - * spatial filter dimensions - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * @param[in] bias_data Optional bias data pointer. Data type: int32 - * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] - * @param[out] output_data Output data pointer. Data type: int8 - - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * 1. Supported framework: TensorFlow Lite micro - * 2. q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs. - * 3. Additional memory is required for optimization. Refer to argument 'ctx' for details. - * - */ -arm_status arm_convolve_s8(const cmsis_nn_context *ctx, - const cmsis_nn_conv_params *conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - -/** - * @brief Get the required buffer size for s8 convolution function - * - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK - * are the spatial filter dimensions - * @return The function returns required buffer size(bytes) - * - */ -int32_t arm_convolve_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims); - -/** - * @brief Basic s16 convolution function - * @param[in, out] ctx Function context that contains the additional buffer if required by the function. - arm_convolve_s16_get_buffer_size will return the buffer_size if required - * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). - * conv_params->input_offset : Not used - * conv_params->output_offset : Not used - * @param[in] quant_params Per-channel quantization info. - * It contains the multiplier and shift values to be applied to each output channel - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * @param[in] input_data Input (activation) data pointer. Data type: int16 - * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the - * spatial filter dimensions - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * @param[in] bias_data Optional bias data pointer. Data type: int64 - * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] - * @param[out] output_data Output data pointer. Data type: int16 - - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * 1. Supported framework: TensorFlow Lite micro - * 2. q7/q15 is used as data type eventhough it is s8/s16 data. It is done so to be consistent with existing APIs. - * 3. Additional memory is required for optimization. Refer to argument 'ctx' for details. - * - */ -arm_status arm_convolve_s16(const cmsis_nn_context *ctx, - const cmsis_nn_conv_params *conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q15_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int64_t *bias_data, - const cmsis_nn_dims *output_dims, - q15_t *output_data); -/** - * @brief Optimized s16 convolution function - * @param[in, out] ctx Function context that contains the additional buffer if required by the function. - arm_convolve_fast_s16_get_buffer_size will return the buffer_size if required - * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). - * conv_params->input_offset : Not used - * conv_params->output_offset : Not used - * @param[in] quant_params Per-channel quantization info. - * It contains the multiplier and shift values to be applied to each output channel - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * @param[in] input_data Input (activation) data pointer. Data type: int16 - * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK are the - * spatial filter dimensions. (filter_dims->w * filter_dims->h * input_dims->c) must not - exceed 512 - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * @param[in] bias_data Optional bias data pointer. Data type: int64 - * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] - * @param[out] output_data Output data pointer. Data type: int16 - - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * 1. Supported framework: TensorFlow Lite micro - * 2. q7/q15 is used as data type eventhough it is s8/s16 data. It is done so to be consistent with existing APIs. - * 3. Additional memory is required for optimization. Refer to argument 'ctx' for details. - * 4. Implementation supports kernel volumes (filter width * filter height * input channels) < 512. - * - */ - -arm_status arm_convolve_fast_s16(const cmsis_nn_context *ctx, - const cmsis_nn_conv_params *conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q15_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int64_t *bias_data, - const cmsis_nn_dims *output_dims, - q15_t *output_data); - -/** - * @brief Get the required buffer size for s16 convolution function - * - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK - * are the spatial filter dimensions - * @return The function returns required buffer size(bytes) - * - */ -int32_t arm_convolve_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims); - -/** - * @brief Get the required buffer size for fast s16 convolution function - * - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, HK, WK, C_IN] where HK and WK - * are the spatial filter dimensions - * @return The function returns required buffer size(bytes) - * - */ -int32_t arm_convolve_fast_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims); - -/** - * @brief Basic Q7 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimension - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns ARM_MATH_SUCCESS - * - */ -arm_status arm_convolve_HWC_q7_basic(const q7_t *Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t *Im_out, - const uint16_t dim_im_out, - q15_t *bufferA, - q7_t *bufferB); - -/** - * @brief Basic Q7 convolution function (non-square shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimension x - * @param[in] dim_im_in_y input tensor dimension y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns ARM_MATH_SUCCESS - */ -arm_status arm_convolve_HWC_q7_basic_nonsquare(const q7_t *Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t *Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t *bufferA, - q7_t *bufferB); - -/** - * @brief Basic Q15 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimension - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns ARM_MATH_SUCCESS - * - */ -arm_status arm_convolve_HWC_q15_basic(const q15_t *Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q15_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q15_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q15_t *Im_out, - const uint16_t dim_im_out, - q15_t *bufferA, - q7_t *bufferB); - -/** - * @brief Fast Q7 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimension - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 4 - * ch_im_out is multiple of 2 - */ -arm_status arm_convolve_HWC_q7_fast(const q7_t *Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t *Im_out, - const uint16_t dim_im_out, - q15_t *bufferA, - q7_t *bufferB); - -/** - * @brief Fast Q7 convolution function (non-sqaure shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimension x - * @param[in] dim_im_in_y input tensor dimension y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 4 - * ch_im_out is multiple of 2 - */ - -arm_status arm_convolve_HWC_q7_fast_nonsquare(const q7_t *Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t *Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t *bufferA, - q7_t *bufferB); - -/** - * @brief Fast Q7 version of 1x1 convolution (non-sqaure shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimension x - * @param[in] dim_im_in_y input tensor dimension y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH if argument constraints fail. or, - * ARM_MATH_SUCCESS on successful completion. - * - * This function implement convolution with 1x1 kernel size (i.e., dim_kernel_x=1 - * and dim_kernel_y=1). It can be used for - * second half of MobileNets after depthwise separable convolution. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 4 - * ch_im_out is multiple of 2 - */ -arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t *Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t *Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t *bufferA, - q7_t *bufferB); - -/** - * @brief Fast s8 version for 1x1 convolution (non-square shape) - * - * @param[in, out] ctx Function context that contains the additional buffer if required by the function. - arm_convolve_1x1_s8_fast_get_buffer_size will return the buffer_size if required - * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). - * Range of conv_params->input_offset : [-127, 128] - * Range of conv_params->output_offset : [-128, 127] - * @param[in] quant_params Per-channel quantization info. - * It contains the multiplier and shift values to be applied to each output channel - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, 1, C_IN] - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * @param[in] bias_data Optional bias data pointer. Data type: int32 - * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] - * @param[out] output_data Output data pointer. Data type: int8 - * - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH if argument constraints fail. or, - * ARM_MATH_SUCCESS on successful completion. - * - * @details - * - Supported framework : TensorFlow Lite Micro - * - The following constrains on the arguments apply - * -# input_dims->c is a multiple of 4 - * -# conv_params->padding.w = conv_params->padding.h = 0 - * -# conv_params->stride.w = conv_params->stride.h = 1 - * - */ -arm_status arm_convolve_1x1_s8_fast(const cmsis_nn_context *ctx, - const cmsis_nn_conv_params *conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - -/** - * @brief Get the required buffer size for arm_convolve_1x1_s8_fast - * - * @param[in] input_dims Input (activation) dimensions - * @return The function returns the required buffer size in bytes - * - */ -int32_t arm_convolve_1x1_s8_fast_get_buffer_size(const cmsis_nn_dims *input_dims); - -/** - * @brief 1xn convolution - * - * @param[in, out] ctx Function context that contains the additional buffer if required by the function. - arm_convolve_1_x_n_s8_get_buffer_size will return the buffer_size if required - * @param[in] conv_params Convolution parameters (e.g. strides, dilations, pads,...). - * Range of conv_params->input_offset : [-127, 128] - * Range of conv_params->output_offset : [-128, 127] - * @param[in] quant_params Per-channel quantization info. - * It contains the multiplier and shift values to be applied to each output channel - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the horizontal - * spatial filter dimension - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * @param[in] bias_data Optional bias data pointer. Data type: int32 - * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] - * @param[out] output_data Output data pointer. Data type: int8 - * - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH if argument constraints fail. or, - * ARM_MATH_SUCCESS on successful completion. - * - * @details - * - Supported framework : TensorFlow Lite Micro - * - The following constrains on the arguments apply - * -# input_dims->n equals 1 - * -# ouput_dims->w is a multiple of 4 - * -# Explicit constraints(since it is for 1xN convolution) - * -## input_dims->h equals 1 - * -## output_dims->h equals 1 - * -## filter_dims->h equals 1 - *@todo Remove constraint on output_dims->w to make the function generic. - * - */ -arm_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx, - const cmsis_nn_conv_params *conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - -/** - * @brief Get the required additional buffer size for 1xn convolution - * - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * @param[in] filter_dims Filter tensor dimensions. Format: [C_OUT, 1, WK, C_IN] where WK is the - * horizontal spatial filter dimension - * @return The function returns required buffer size(bytes) - * - */ -int32_t arm_convolve_1_x_n_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims); - -/** - * @brief Q7 version of convolution for RGB image - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimension - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * This kernel is written exclusively for convolution with ch_im_in - * equals 3. This applies on the first layer of CNNs which has input - * image with RGB format. - */ - -arm_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t *Im_out, - const uint16_t dim_im_out, - q15_t *bufferA, - q7_t *bufferB); - -/** - * @brief Fast Q15 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimension - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 2 - * ch_im_out is multiple of 2 - * dim_im_out is a multiple of 2 - */ - -arm_status arm_convolve_HWC_q15_fast(const q15_t *Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q15_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q15_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q15_t *Im_out, - const uint16_t dim_im_out, - q15_t *bufferA, - q7_t *bufferB); - -/** - * @brief Fast Q15 convolution function (non-sqaure shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimension x - * @param[in] dim_im_in_y input tensor dimension y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * @details - * - * Buffer size: - * - * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - * - * bufferB size: 0 - * - * Input dimension constraints: - * - * ch_im_in is multiple of 2 - * - * ch_im_out is multipe of 2 - * - */ - -arm_status arm_convolve_HWC_q15_fast_nonsquare(const q15_t *Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q15_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q15_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q15_t *Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t *bufferA, - q7_t *bufferB); - -/** - * @brief Q7 depthwise separable convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimension - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 2 - * ch_im_out is multiple of 2 - */ - -arm_status arm_depthwise_separable_conv_HWC_q7(const q7_t *Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t *Im_out, - const uint16_t dim_im_out, - q15_t *bufferA, - q7_t *bufferB); - -/** - * @brief Q7 depthwise separable convolution function (non-square shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimension x - * @param[in] dim_im_in_y input tensor dimension y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding sizes x - * @param[in] padding_y padding sizes y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * This function is the version with full list of optimization tricks, but with - * some contraints: - * ch_im_in is multiple of 2 - * ch_im_out is multiple of 2 - */ -arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t *Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t *bufferA, - q7_t *bufferB); - -/** - * @brief Wrapper function to pick the right optimized s8 depthwise convolution function - * - * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function - * definition file to see if an additional buffer is required. - * Optional function {API}_get_buffer_size() provides the buffer - * size if required. - * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...) - * dw_conv_params->dilation is not used. - * Range of dw_conv_params->input_offset : [-127, 128] - * Range of dw_conv_params->output_offset : [-128, 127] - * @param[in] quant_params Per-channel quantization info. - * It contains the multiplier and shift values to be applied to each - * output channel - * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] - * Batch argument N is not used and assumed to be 1. - * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * @param[in] bias_data Bias data pointer. Data type: int32 - * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT] - * @param[in, out] output_data Output data pointer. Data type: int8 - * @return The function returns - * ARM_MATH_SUCCESS - Successful completion. - * - * @details - * - Supported framework: TensorFlow Lite - * - Picks one of the the following functions - * -# arm_depthwise_conv_s8() - * -# arm_depthwise_conv_3x3_s8() - Cortex-M CPUs with DSP extension only - * -# arm_depthwise_conv_s8_opt() - * - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs. - * - Check details of arm_depthwise_conv_s8_opt() for potential data that can be accessed outside of the - * boundary. - */ -arm_status arm_depthwise_conv_wrapper_s8(const cmsis_nn_context *ctx, - const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - -/** - * @brief Get size of additional buffer required by arm_depthwise_conv_wrapper_s8() - * - * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...) - * dw_conv_params->dilation is not used. - * Range of dw_conv_params->input_offset : [-127, 128] - * Range of dw_conv_params->input_offset : [-128, 127] - * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] - * Batch argument N is not used and assumed to be 1. - * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] - * @param[in] output_dims Output tensor dimensions. Format: [1, H, W, C_OUT] - * @return Size of additional memory required for optimizations in bytes. - * - */ -int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size(const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_dims *input_dims, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims); - -/** - * @brief Basic s8 depthwise convolution function that doesn't have any constraints on the input dimensions. - * - * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function - * definition file to see if an additional buffer is required. - * Optional function {API}_get_buffer_size() provides the buffer - * size if an additional buffer is required. - * exists if additional memory is. - * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...) - * dw_conv_params->dilation is not used. - * Range of dw_conv_params->input_offset : [-127, 128] - * Range of dw_conv_params->input_offset : [-128, 127] - * @param[in] quant_params Per-channel quantization info. - * It contains the multiplier and shift values to be applied to each - * output channel - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * Batch argument N is not used. - * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * @param[in] bias_data Bias data pointer. Data type: int32 - * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] - * @param[in, out] output_data Output data pointer. Data type: int8 - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * - Supported framework: TensorFlow Lite - * - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs. - */ -arm_status arm_depthwise_conv_s8(const cmsis_nn_context *ctx, - const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - -/** - * @brief Basic s16 depthwise convolution function that doesn't have any constraints on the input dimensions. - * - * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function - * definition file to see if an additional buffer is required. - * Optional function {API}_get_buffer_size() provides the buffer - * size if an additional buffer is required. - * exists if additional memory is. - * @param[in] dw_conv_params Depthwise convolution parameters (e.g. strides, dilations, pads,...) - * conv_params->input_offset : Not used - * conv_params->output_offset : Not used - * @param[in] quant_params Per-channel quantization info. - * It contains the multiplier and shift values to be applied to each - * output channel - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * Batch argument N is not used. - * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * @param[in] bias_data Bias data pointer. Data type: int64 - * @param[in] output_dims Output tensor dimensions. Format: [N, H, W, C_OUT] - * @param[in, out] output_data Output data pointer. Data type: int16 - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * - Supported framework: TensorFlow Lite - * - q15 is used as data type eventhough it is s16 data. It is done so to be consistent with existing APIs. - */ -arm_status arm_depthwise_conv_s16(const cmsis_nn_context *ctx, - const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q15_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int64_t *bias_data, - const cmsis_nn_dims *output_dims, - q15_t *output_data); - -/** - * @brief Optimized s8 depthwise convolution function for 3x3 kernel size with some constraints on - * the input arguments(documented below). Refer arm_depthwise_conv_s8() for function - * argument details. - * - * @return The function returns one of the following - * ARM_MATH_SIZE_MISMATCH - Unsupported dimension of tensors - * ARM_MATH_ARGUMENT_ERROR - Unsupported pad size along the x axis - * ARM_MATH_SUCCESS - Successful operation - * - * @details - * - Supported framework : TensorFlow Lite Micro - * - The following constrains on the arguments apply - * -# Number of input channel equals number of output channels - * -# Filter height and width equals 3 - * -# Padding along x is either 0 or 1. - * - */ -arm_status arm_depthwise_conv_3x3_s8(const cmsis_nn_context *ctx, - const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - -/** - * @brief Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel. - * Refer arm_depthwise_conv_s8() for function argument details. - * - * @return The function returns one of the following - * ARM_MATH_SIZE_MISMATCH - input channel != output channel or - * ch_mult != 1 - * ARM_MATH_SUCCESS - Successful operation - * - * @note If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read out - * for the following if MVE optimizations(Arm Helium Technology) are used. - * - Output shift - * - Output multiplier - * - Output bias - * - kernel - * @details - * - Supported framework: TensorFlow Lite - * - The following constrains on the arguments apply - * -# Number of input channel equals number of output channels or ch_mult equals 1 - * - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs. - * - Reccomended when number of channels is 4 or greater. - * - */ -arm_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx, - const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - -/** - * @brief Get the required buffer size for optimized s8 depthwise convolution - * function with constraint that in_channel equals out_channel. - * @param[in] input_dims Input (activation) tensor dimensions. Format: [1, H, W, C_IN] - * Batch argument N is not used. - * @param[in] filter_dims Filter tensor dimensions. Format: [1, H, W, C_OUT] - * @return The function returns required buffer size in bytes - * - */ -int32_t arm_depthwise_conv_s8_opt_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims); - -/** - * @defgroup FC Fully-connected Layer Functions - * - * Collection of fully-connected and matrix multiplication functions. - * - * Fully-connected layer is basically a matrix-vector multiplication - * with bias. The matrix is the weights and the input/output vectors - * are the activation values. Supported {weight, activation} precisions - * include {8-bit, 8-bit}, {16-bit, 16-bit}, and {8-bit, 16-bit}. - * - * Here we have two types of kernel functions. The basic function - * implements the function using regular GEMV approach. The opt functions - * operates with weights in interleaved formats. - * - */ - -/** - *@brief Q7 basic fully-connected layer function - *@param[in] pV pointer to input vector - *@param[in] pM pointer to matrix weights - *@param[in] dim_vec length of the vector - *@param[in] num_of_rows number of rows in weight matrix - *@param[in] bias_shift amount of left-shift for bias - *@param[in] out_shift amount of right-shift for output - *@param[in] bias pointer to bias - *@param[in,out] pOut pointer to output vector - *@param[in,out] vec_buffer pointer to buffer space for input - *@return The function returns ARM_MATH_SUCCESS - * - */ - -arm_status arm_fully_connected_q7(const q7_t *pV, - const q7_t *pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t *bias, - q7_t *pOut, - q15_t *vec_buffer); - -/** - * @brief Basic s8 Fully Connected function. - * - * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function - * definition file to see if an additional buffer is required. - * Optional function {API}_get_buffer_size() provides the buffer - * size if an additional buffer is required. - * @param[in] fc_params Fully Connected layer parameters. - * Range of fc_params->input_offset : [-127, 128] - * fc_params->filter_offset : 0 - * Range of fc_params->output_offset : [-128, 127] - * @param[in] quant_params Per-tensor quantization info. - * It contains the multiplier and shift values to be applied to the output tensor. - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * Input dimension is taken as Nx(H * W * C_IN) - * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Two dimensional filter dimensions. Format: [N, C] - * N : accumulation depth and equals (H * W * C_IN) from input_dims - * C : output depth and equals C_OUT in output_dims - * H & W : Not used - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * N, H, W : Not used - * @param[in] bias_data Bias data pointer. Data type: int32 - * @param[in] output_dims Output tensor dimensions. Format: [N, C_OUT] - * N : Batches - * C_OUT : Output depth - * H & W : Not used. - * @param[in, out] output_data Output data pointer. Data type: int8 - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * - Supported framework: TensorFlow Lite - * - q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs. - */ -arm_status arm_fully_connected_s8(const cmsis_nn_context *ctx, - const cmsis_nn_fc_params *fc_params, - const cmsis_nn_per_tensor_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - -/** - * @brief Get the required buffer size for S8 basic fully-connected and - * matrix multiplication layer function for TF Lite - * @param[in] filter_dims dimension of filter - * @return The function returns required buffer size in bytes - * - */ -int32_t arm_fully_connected_s8_get_buffer_size(const cmsis_nn_dims *filter_dims); - -/** - * @brief Basic s16 Fully Connected function. - * - * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function - * definition file to see if an additional buffer is required. - * Optional function {API}_get_buffer_size() provides the buffer - * size if an additional buffer is required. - * @param[in] fc_params Fully Connected layer parameters. - * fc_params->input_offset : 0 - * fc_params->filter_offset : 0 - * fc_params->output_offset : 0 - * @param[in] quant_params Per-tensor quantization info. - * It contains the multiplier and shift values to be applied to the output tensor. - * @param[in] input_dims Input (activation) tensor dimensions. Format: [N, H, W, C_IN] - * Input dimension is taken as Nx(H * W * C_IN) - * @param[in] input_data Input (activation) data pointer. Data type: int16 - * @param[in] filter_dims Two dimensional filter dimensions. Format: [N, C] - * N : accumulation depth and equals (H * W * C_IN) from input_dims - * C : output depth and equals C_OUT in output_dims - * H & W : Not used - * @param[in] filter_data Filter data pointer. Data type: int8 - * @param[in] bias_dims Bias tensor dimensions. Format: [C_OUT] - * N, H, W : Not used - * @param[in] bias_data Bias data pointer. Data type: int64 - * @param[in] output_dims Output tensor dimensions. Format: [N, C_OUT] - * N : Batches - * C_OUT : Output depth - * H & W : Not used. - * @param[in, out] output_data Output data pointer. Data type: int16 - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * - Supported framework: TensorFlow Lite - * - q15 is used as data type eventhough it is s16 data. It is done so to be consistent with existing APIs. - */ -arm_status arm_fully_connected_s16(const cmsis_nn_context *ctx, - const cmsis_nn_fc_params *fc_params, - const cmsis_nn_per_tensor_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q15_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int64_t *bias_data, - const cmsis_nn_dims *output_dims, - q15_t *output_data); - -/** - * @brief Get the required buffer size for S16 basic fully-connected and - * matrix multiplication layer function for TF Lite - * @param[in] filter_dims dimension of filter - * @return The function returns required buffer size in bytes - * - */ -int32_t arm_fully_connected_s16_get_buffer_size(const cmsis_nn_dims *filter_dims); - -/** - * @brief Q7 opt fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - */ - -arm_status arm_fully_connected_q7_opt(const q7_t *pV, - const q7_t *pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t *bias, - q7_t *pOut, - q15_t *vec_buffer); - -/** - * @brief Q15 basic fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - */ - -arm_status arm_fully_connected_q15(const q15_t *pV, - const q15_t *pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q15_t *bias, - q15_t *pOut, - q15_t *vec_buffer); - -/** - * @brief Q15 opt fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - */ - -arm_status arm_fully_connected_q15_opt(const q15_t *pV, - const q15_t *pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q15_t *bias, - q15_t *pOut, - q15_t *vec_buffer); - -/** - * @brief Mixed Q15-Q7 fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - */ - -arm_status arm_fully_connected_mat_q7_vec_q15(const q15_t *pV, - const q7_t *pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t *bias, - q15_t *pOut, - q15_t *vec_buffer); - -/** - * @brief Mixed Q15-Q7 opt fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - */ - -arm_status arm_fully_connected_mat_q7_vec_q15_opt(const q15_t *pV, - const q7_t *pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t *bias, - q15_t *pOut, - q15_t *vec_buffer); - -/** - * @brief Matrix-Multiplication Kernels for Convolution - * - * These functions are used within convolution layer functions for - * matrix multiplication. - * - * The implementation is similar to CMSIS-DSP arm_mat_mult functions - * with one Q7 and one Q15 operands. The Q15 operand is the im2col - * output which is always with 2 columns. - * - */ - -/** - * @brief Matrix-multiplication function for convolution - * @param[in] pA pointer to operand A - * @param[in] pInBuffer pointer to operand B, always conssists of 2 vectors - * @param[in] ch_im_out numRow of A - * @param[in] numCol_A numCol of A - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias the bias - * @param[in,out] pOut pointer to output - * @return The function returns the incremented output pointer - */ - -q7_t *arm_nn_mat_mult_kernel_q7_q15(const q7_t *pA, - const q15_t *pInBuffer, - const uint16_t ch_im_out, - const uint16_t numCol_A, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t *bias, - q7_t *pOut); - -#ifdef __cplusplus -} -#endif - -/* - * Other functions - * These layers are typically not timing critical - * Basic implementation is supported here - */ - -#ifdef __cplusplus -extern "C" { -#endif - -/** - * @defgroup BasicMath Basic math functions - * - * Elementwise add and multiplication functions. - * - */ - -/** - * @brief s8 elementwise add of two vectors - * @param[in] input_1_vect pointer to input vector 1 - * @param[in] input_2_vect pointer to input vector 2 - * @param[in] input_1_offset offset for input 1. Range: -127 to 128 - * @param[in] input_1_mult multiplier for input 1 - * @param[in] input_1_shift shift for input 1 - * @param[in] input_2_offset offset for input 2. Range: -127 to 128 - * @param[in] input_2_mult multiplier for input 2 - * @param[in] input_2_shift shift for input 2 - * @param[in] left_shift input left shift - * @param[in,out] output pointer to output vector - * @param[in] out_offset output offset. Range: -128 to 127 - * @param[in] out_mult output multiplier - * @param[in] out_shift output shift - * @param[in] out_activation_min minimum value to clamp output to. Min: -128 - * @param[in] out_activation_max maximum value to clamp output to. Max: 127 - * @param[in] block_size number of samples - * @return The function returns ARM_MATH_SUCCESS - */ -arm_status arm_elementwise_add_s8(const int8_t *input_1_vect, - const int8_t *input_2_vect, - const int32_t input_1_offset, - const int32_t input_1_mult, - const int32_t input_1_shift, - const int32_t input_2_offset, - const int32_t input_2_mult, - const int32_t input_2_shift, - const int32_t left_shift, - int8_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t out_activation_min, - const int32_t out_activation_max, - const int32_t block_size); - -/** - * @brief s16 elementwise add of two vectors - * @param[in] input_1_vect pointer to input vector 1 - * @param[in] input_2_vect pointer to input vector 2 - * @param[in] input_1_offset offset for input 1. Not used. - * @param[in] input_1_mult multiplier for input 1 - * @param[in] input_1_shift shift for input 1 - * @param[in] input_2_offset offset for input 2. Not used. - * @param[in] input_2_mult multiplier for input 2 - * @param[in] input_2_shift shift for input 2 - * @param[in] left_shift input left shift - * @param[in,out] output pointer to output vector - * @param[in] out_offset output offset. Not used. - * @param[in] out_mult output multiplier - * @param[in] out_shift output shift - * @param[in] out_activation_min minimum value to clamp output to. Min: -32768 - * @param[in] out_activation_max maximum value to clamp output to. Max: 32767 - * @param[in] block_size number of samples - * @return The function returns ARM_MATH_SUCCESS - */ -arm_status arm_elementwise_add_s16(const int16_t *input_1_vect, - const int16_t *input_2_vect, - const int32_t input_1_offset, - const int32_t input_1_mult, - const int32_t input_1_shift, - const int32_t input_2_offset, - const int32_t input_2_mult, - const int32_t input_2_shift, - const int32_t left_shift, - int16_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t out_activation_min, - const int32_t out_activation_max, - const int32_t block_size); - -/** - * @brief s8 elementwise multiplication - * @param[in] input_1_vect pointer to input vector 1 - * @param[in] input_2_vect pointer to input vector 2 - * @param[in] input_1_offset offset for input 1. Range: -127 to 128 - * @param[in] input_2_offset offset for input 2. Range: -127 to 128 - * @param[in,out] output pointer to output vector - * @param[in] out_offset output offset. Range: -128 to 127 - * @param[in] out_mult output multiplier - * @param[in] out_shift output shift - * @param[in] out_activation_min minimum value to clamp output to. Min: -128 - * @param[in] out_activation_max maximum value to clamp output to. Max: 127 - * @param[in] block_size number of samples - * @return The function returns ARM_MATH_SUCCESS - * - * @details Supported framework: TensorFlow Lite micro - */ -arm_status arm_elementwise_mul_s8(const int8_t *input_1_vect, - const int8_t *input_2_vect, - const int32_t input_1_offset, - const int32_t input_2_offset, - int8_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t out_activation_min, - const int32_t out_activation_max, - const int32_t block_size); - -/** - * @brief s16 elementwise multiplication - * @param[in] input_1_vect pointer to input vector 1 - * @param[in] input_2_vect pointer to input vector 2 - * @param[in] input_1_offset offset for input 1. Not used. - * @param[in] input_2_offset offset for input 2. Not used. - * @param[in,out] output pointer to output vector - * @param[in] out_offset output offset. Not used. - * @param[in] out_mult output multiplier - * @param[in] out_shift output shift - * @param[in] out_activation_min minimum value to clamp output to. Min: -32768 - * @param[in] out_activation_max maximum value to clamp output to. Max: 32767 - * @param[in] block_size number of samples - * @return The function returns ARM_MATH_SUCCESS - * - * @details Supported framework: TensorFlow Lite micro - */ -arm_status arm_elementwise_mul_s16(const int16_t *input_1_vect, - const int16_t *input_2_vect, - const int32_t input_1_offset, - const int32_t input_2_offset, - int16_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t out_activation_min, - const int32_t out_activation_max, - const int32_t block_size); - -/** - * @defgroup Acti Activation Functions - * - * Perform activation layers, including ReLU (Rectified Linear Unit), - * sigmoid and tanh - * - */ - -/** - * @brief Q7 RELU function - * @param[in,out] data pointer to input - * @param[in] size number of elements - * @return none. - */ - -void arm_relu_q7(q7_t *data, uint16_t size); - -/** - * @brief s8 ReLU6 function - * @param[in,out] data pointer to input - * @param[in] size number of elements - */ - -void arm_relu6_s8(q7_t *data, uint16_t size); - -/** - * @brief Q15 RELU function - * @param[in,out] data pointer to input - * @param[in] size number of elements - * @return none. - */ - -void arm_relu_q15(q15_t *data, uint16_t size); - -/** - * @brief Q7 neural network activation function using direct table look-up - * @param[in,out] data pointer to input - * @param[in] size number of elements - * @param[in] int_width bit-width of the integer part, assume to be smaller than 3 - * @param[in] type type of activation functions - * @return none. - */ - -void arm_nn_activations_direct_q7(q7_t *data, uint16_t size, uint16_t int_width, arm_nn_activation_type type); - -/** - * @brief Q15 neural network activation function using direct table look-up - * @param[in,out] data pointer to input - * @param[in] size number of elements - * @param[in] int_width bit-width of the integer part, assume to be smaller than 3 - * @param[in] type type of activation functions - * @return none. - * - * @details - * - * This is the direct table look-up approach. - * - * Assume here the integer part of the fixed-point is <= 3. - * More than 3 just not making much sense, makes no difference with - * saturation followed by any of these activation functions. - */ - -void arm_nn_activations_direct_q15(q15_t *data, uint16_t size, uint16_t int_width, arm_nn_activation_type type); - -/** - * @defgroup Pooling Pooling Functions - * - * Perform pooling functions, including max pooling and average pooling - * - */ - -/** - * @brief Q7 max pooling function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimension - * @param[in] ch_im_in number of input tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] Im_out pointer to output tensor - * @return none. - * - */ - -void arm_maxpool_q7_HWC(q7_t *Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const uint16_t dim_im_out, - q7_t *bufferA, - q7_t *Im_out); - -/** - * @brief Q7 average pooling function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimension - * @param[in] ch_im_in number of input tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] Im_out pointer to output tensor - * @return none. - * - */ - -void arm_avepool_q7_HWC(q7_t *Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const uint16_t dim_im_out, - q7_t *bufferA, - q7_t *Im_out); - -/** - * @brief s8 average pooling function. - * - * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function - * definition file to see if an additional buffer is required. - * Optional function {API}_get_buffer_size() provides the buffer - * size if an additional buffer is required. - * @param[in] pool_params Pooling parameters - * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] - * Argument 'N' is not used. - * @param[in] input_data Input (activation) data pointer. Data type: int8 - * @param[in] filter_dims Filter tensor dimensions. Format: [H, W] - * Argument N and C are not used. - * @param[in] output_dims Output tensor dimensions. Format: [H, W, C_OUT] - * Argument N is not used. - * C_OUT equals C_IN. - * @param[in, out] output_data Output data pointer. Data type: int8 - * @return The function returns - * ARM_MATH_SUCCESS - Successful operation - * - * @details - * - Supported Framework: TensorFlow Lite - * - */ -arm_status arm_avgpool_s8(const cmsis_nn_context *ctx, - const cmsis_nn_pool_params *pool_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - -/** - * @brief Get the required buffer size for S8 average pooling function - * @param[in] dim_dst_width output tensor dimension - * @param[in] ch_src number of input tensor channels - * @return The function returns required buffer size in bytes - * - */ -int32_t arm_avgpool_s8_get_buffer_size(const int dim_dst_width, const int ch_src); - -/** - * @brief s16 average pooling function. - * - * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function - * definition file to see if an additional buffer is required. - * Optional function {API}_get_buffer_size() provides the buffer - * size if an additional buffer is required. - * @param[in] pool_params Pooling parameters - * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] - * Argument 'N' is not used. - * @param[in] input_data Input (activation) data pointer. Data type: int16 - * @param[in] filter_dims Filter tensor dimensions. Format: [H, W] - * Argument N and C are not used. - * @param[in] output_dims Output tensor dimensions. Format: [H, W, C_OUT] - * Argument N is not used. - * C_OUT equals C_IN. - * @param[in, out] output_data Output data pointer. Data type: int16 - * @return The function returns - * ARM_MATH_SUCCESS - Successful operation - * - * @details - * - Supported Framework: TensorFlow Lite - * - */ -arm_status arm_avgpool_s16(const cmsis_nn_context *ctx, - const cmsis_nn_pool_params *pool_params, - const cmsis_nn_dims *input_dims, - const int16_t *input_data, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims, - int16_t *output_data); - -/** - * @brief Get the required buffer size for S16 average pooling function - * @param[in] dim_dst_width output tensor dimension - * @param[in] ch_src number of input tensor channels - * @return The function returns required buffer size in bytes - * - */ -int32_t arm_avgpool_s16_get_buffer_size(const int dim_dst_width, const int ch_src); - -/** - * @brief s8 max pooling function. - * - * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function - * definition file to see if an additional buffer is required. - * Optional function {API}_get_buffer_size() provides the buffer - * size if an additional buffer is required. - * @param[in] pool_params Pooling parameters - * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] - * Argument 'N' is not used. - * @param[in] input_data Input (activation) data pointer. The input tensor must not - * overlap with the output tensor. Data type: int8 - * @param[in] filter_dims Filter tensor dimensions. Format: [H, W] - * Argument N and C are not used. - * @param[in] output_dims Output tensor dimensions. Format: [H, W, C_OUT] - * Argument N is not used. - * C_OUT equals C_IN. - * @param[in, out] output_data Output data pointer. Data type: int8 - * @return The function returns - * ARM_MATH_SUCCESS - Successful operation - * - * @details - * - Supported Framework: TensorFlow Lite - * - */ -arm_status arm_max_pool_s8(const cmsis_nn_context *ctx, - const cmsis_nn_pool_params *pool_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - -/** - * @brief s16 max pooling function. - * - * @param[in, out] ctx Function context (e.g. temporary buffer). Check the function - * definition file to see if an additional buffer is required. - * Optional function {API}_get_buffer_size() provides the buffer - * size if an additional buffer is required. - * @param[in] pool_params Pooling parameters - * @param[in] input_dims Input (activation) tensor dimensions. Format: [H, W, C_IN] - * Argument 'N' is not used. - * @param[in] src Input (activation) data pointer. The input tensor must not - * overlap with the output tensor. Data type: int16 - * @param[in] filter_dims Filter tensor dimensions. Format: [H, W] - * Argument N and C are not used. - * @param[in] output_dims Output tensor dimensions. Format: [H, W, C_OUT] - * Argument N is not used. - * C_OUT equals C_IN. - * @param[in, out] dst Output data pointer. Data type: int16 - * @return The function returns - * ARM_MATH_SUCCESS - Successful operation - * - * @details - * - Supported Framework: TensorFlow Lite - * - */ -arm_status arm_max_pool_s16(const cmsis_nn_context *ctx, - const cmsis_nn_pool_params *pool_params, - const cmsis_nn_dims *input_dims, - const int16_t *src, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims, - int16_t *dst); - -/** - * @defgroup Softmax Softmax Functions - * - * EXP(2) based softmax functions. - * - */ - -/** - * @brief Q7 softmax function - * @param[in] vec_in pointer to input vector - * @param[in] dim_vec input vector dimension - * @param[out] p_out pointer to output vector - * - * @note This function is an optimized version which is not bit-accurate with - * TensorFlow Lite's kernel - * - */ - -void arm_softmax_q7(const q7_t *vec_in, const uint16_t dim_vec, q7_t *p_out); - -/** - * @brief Q7 softmax function with batch parameter - * @param[in] vec_in pointer to input vector - * @param[in] nb_batches number of batches - * @param[in] dim_vec input vector dimension - * @param[out] p_out pointer to output vector - * @return none. - * - * @note This function is an optimized version which is not bit-accurate with - * TensorFlow Lite's kernel - * - */ - -void arm_softmax_with_batch_q7(const q7_t *vec_in, const uint16_t nb_batches, const uint16_t dim_vec, q7_t *p_out); -/** - * @brief Q15 softmax function - * @param[in] vec_in pointer to input vector - * @param[in] dim_vec input vector dimension - * @param[out] p_out pointer to output vector - * @return none. - * - * @note This function is an optimized version which is not bit-accurate with - * TensorFlow Lite's kernel - * - */ - -void arm_softmax_q15(const q15_t *vec_in, const uint16_t dim_vec, q15_t *p_out); - -/** - * @brief S8 softmax function - * @param[in] input Pointer to the input tensor - * @param[in] num_rows Number of rows in the input tensor - * @param[in] row_size Number of elements in each input row - * @param[in] mult Input quantization multiplier - * @param[in] shift Input quantization shift within the range [0, 31] - * @param[in] diff_min Minimum difference with max in row. Used to check if - * the quantized exponential operation can be performed - * @param[out] output Pointer to the output tensor - * - * @note Supported framework: TensorFlow Lite micro (bit-accurate) - * - */ -void arm_softmax_s8(const int8_t *input, - const int32_t num_rows, - const int32_t row_size, - const int32_t mult, - const int32_t shift, - const int32_t diff_min, - int8_t *output); - -/** - * @brief S8 to s16 softmax function - * @param[in] input Pointer to the input tensor - * @param[in] num_rows Number of rows in the input tensor - * @param[in] row_size Number of elements in each input row - * @param[in] mult Input quantization multiplier - * @param[in] shift Input quantization shift within the range [0, 31] - * @param[in] diff_min Minimum difference with max in row. Used to check if - * the quantized exponential operation can be performed - * @param[out] output Pointer to the output tensor - * - * @note Supported framework: TensorFlow Lite micro (bit-accurate) - * - */ -void arm_softmax_s8_s16(const int8_t *input, - const int32_t num_rows, - const int32_t row_size, - const int32_t mult, - const int32_t shift, - const int32_t diff_min, - int16_t *output); - -/** - * @brief S16 softmax function - * @param[in] input Pointer to the input tensor - * @param[in] num_rows Number of rows in the input tensor - * @param[in] row_size Number of elements in each input row - * @param[in] mult Input quantization multiplier - * @param[in] shift Input quantization shift within the range [0, 31] - * @param[in] softmax_params Softmax s16 layer parameters with two pointers to LUTs speficied below. - * For indexing the high 9 bits are used and 7 remaining for interpolation. - * That means 512 entries for the 9-bit indexing and 1 extra for interpolation, i.e. 513 - * values for each LUT. - * - Lookup table for exp(x), where x uniform distributed between [-10.0 , 0.0] - * - Lookup table for 1 / (1 + x), where x uniform distributed between [0.0 , 1.0] - * @param[out] output Pointer to the output tensor - * @return The function returns - * ARM_MATH_ARGUMENT_ERROR if LUTs are NULL - * ARM_MATH_SUCCESS - Successful operation - * - * @note Supported framework: TensorFlow Lite micro (bit-accurate) - * - */ -arm_status arm_softmax_s16(const int16_t *input, - const int32_t num_rows, - const int32_t row_size, - const int32_t mult, - const int32_t shift, - const cmsis_nn_softmax_lut_s16 *softmax_params, - int16_t *output); - -/** - * @brief U8 softmax function - * @param[in] input Pointer to the input tensor - * @param[in] num_rows Number of rows in the input tensor - * @param[in] row_size Number of elements in each input row - * @param[in] mult Input quantization multiplier - * @param[in] shift Input quantization shift within the range [0, 31] - * @param[in] diff_min Minimum difference with max in row. Used to check if - * the quantized exponential operation can be performed - * @param[out] output Pointer to the output tensor - * - * @note Supported framework: TensorFlow Lite micro (bit-accurate) - * - */ - -void arm_softmax_u8(const uint8_t *input, - const int32_t num_rows, - const int32_t row_size, - const int32_t mult, - const int32_t shift, - const int32_t diff_min, - uint8_t *output); - -/** - * @brief uint8 depthwise convolution function with asymmetric quantization - * Unless specified otherwise, arguments are mandatory. - * - * @param[in] input Pointer to input tensor - * @param[in] input_x Width of input tensor - * @param[in] input_y Height of input tensor - * @param[in] input_ch Channels in input tensor - * @param[in] kernel Pointer to kernel weights - * @param[in] kernel_x Width of kernel - * @param[in] kernel_y Height of kernel - * @param[in] ch_mult Number of channel multiplier - * @param[in] pad_x Padding sizes x - * @param[in] pad_y Padding sizes y - * @param[in] stride_x stride along the width - * @param[in] stride_y stride along the height - * @param[in] dilation_x Dilation along width. Not used and intended for future enhancement. - * @param[in] dilation_y Dilation along height. Not used and intended for future enhancement. - * @param[in] bias Pointer to optional bias values. If no bias is - * availble, NULL is expected - * @param[in] input_offset Input tensor zero offset - * @param[in] filter_offset Kernel tensor zero offset - * @param[in] output_offset Output tensor zero offset - * @param[in,out] output Pointer to output tensor - * @param[in] output_x Width of output tensor - * @param[in] output_y Height of output tensor - * @param[in] output_activation_min Minimum value to clamp the output to. Range : {0, 255} - * @param[in] output_activation_max Minimum value to clamp the output to. Range : {0, 255} - * @param[in] out_shift Amount of right-shift for output - * @param[in] out_mult Output multiplier for requantization - * @return The function returns the following - * ARM_MATH_SUCCESS - Successful operation - * - */ -arm_status arm_depthwise_conv_u8_basic_ver1(const uint8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_ch, - const uint8_t *kernel, - const uint16_t kernel_x, - const uint16_t kernel_y, - const int16_t ch_mult, - const int16_t pad_x, - const int16_t pad_y, - const int16_t stride_x, - const int16_t stride_y, - const int16_t dilation_x, - const int16_t dilation_y, - const int32_t *bias, - const int32_t input_offset, - const int32_t filter_offset, - const int32_t output_offset, - uint8_t *output, - const uint16_t output_x, - const uint16_t output_y, - const int32_t output_activation_min, - const int32_t output_activation_max, - const int32_t out_shift, - const int32_t out_mult); - -/** - * @defgroup Reshape Reshape Functions - * - */ - -/** - * @brief Reshape a s8 vector into another with different shape - * @param[in] input points to the s8 input vector - * @param[out] output points to the s8 output vector - * @param[in] total_size total size of the input and output vectors in bytes - * - * @note The output is expected to be in a memory area that does not overlap with the input's - * - */ -void arm_reshape_s8(const int8_t *input, int8_t *output, const uint32_t total_size); - -/** - * @defgroup Concatenation Concatenation Functions - * - */ - -/** - * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the X axis - * This function should be called for each input tensor to concatenate. The argument offset_x - * will be used to store the input tensor in the correct position in the output tensor - * - * i.e. offset_x = 0 - * for(i = 0 i < num_input_tensors; ++i) - * { - * arm_concatenation_s8_x(&input[i], ..., &output, ..., ..., offset_x) - * offset_x += input_x[i] - * } - * - * This function assumes that the output tensor has: - * -# The same height of the input tensor - * -# The same number of channels of the input tensor - * -# The same batch size of the input tensor - * - * Unless specified otherwise, arguments are mandatory. - * - * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it - * does not involve any arithmetic operation - * - * @param[in] input Pointer to input tensor. Input tensor must not overlap with the output tensor. - * @param[in] input_x Width of input tensor - * @param[in] input_y Height of input tensor - * @param[in] input_z Channels in input tensor - * @param[in] input_w Batch size in input tensor - * @param[out] output Pointer to output tensor. Expected to be at least - * (input_x * input_y * input_z * input_w) + offset_x - * bytes. - * @param[in] output_x Width of output tensor - * @param[in] offset_x The offset (in number of elements) on the X axis to start concatenating the input tensor - * It is user responsibility to provide the correct value - * - * Input constraints - * offset_x is less than output_x - * - */ -void arm_concatenation_s8_x(const int8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_z, - const uint16_t input_w, - int8_t *output, - const uint16_t output_x, - const uint32_t offset_x); - -/** - * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the Y axis - * This function should be called for each input tensor to concatenate. The argument offset_y - * will be used to store the input tensor in the correct position in the output tensor - * - * i.e. offset_y = 0 - * for(i = 0 i < num_input_tensors; ++i) - * { - * arm_concatenation_s8_y(&input[i], ..., &output, ..., ..., offset_y) - * offset_y += input_y[i] - * } - * - * This function assumes that the output tensor has: - * -# The same width of the input tensor - * -# The same number of channels of the input tensor - * -# The same batch size of the input tensor - * - * Unless specified otherwise, arguments are mandatory. - * - * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it - * does not involve any arithmetic operation - * - * @param[in] input Pointer to input tensor. Input tensor must not overlap with the output tensor. - * @param[in] input_x Width of input tensor - * @param[in] input_y Height of input tensor - * @param[in] input_z Channels in input tensor - * @param[in] input_w Batch size in input tensor - * @param[out] output Pointer to output tensor. Expected to be at least - * (input_z * input_w * input_x * input_y) + offset_y - * bytes. - * @param[in] output_y Height of output tensor - * @param[in] offset_y The offset on the Y axis to start concatenating the input tensor - * It is user responsibility to provide the correct value - * - * Input constraints - * offset_y is less than output_y - * - */ -void arm_concatenation_s8_y(const int8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_z, - const uint16_t input_w, - int8_t *output, - const uint16_t output_y, - const uint32_t offset_y); - -/** - * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the Z axis - * This function should be called for each input tensor to concatenate. The argument offset_z - * will be used to store the input tensor in the correct position in the output tensor - * - * i.e. offset_z = 0 - * for(i = 0 i < num_input_tensors; ++i) - * { - * arm_concatenation_s8_z(&input[i], ..., &output, ..., ..., offset_z) - * offset_z += input_z[i] - * } - * - * This function assumes that the output tensor has: - * -# The same width of the input tensor - * -# The same height of the input tensor - * -# The same batch size of the input tensor - * - * Unless specified otherwise, arguments are mandatory. - * - * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it - * does not involve any arithmetic operation - * - * @param[in] input Pointer to input tensor. Input tensor must not overlap with output tensor. - * @param[in] input_x Width of input tensor - * @param[in] input_y Height of input tensor - * @param[in] input_z Channels in input tensor - * @param[in] input_w Batch size in input tensor - * @param[out] output Pointer to output tensor. Expected to be at least - * (input_x * input_y * input_z * input_w) + offset_z - * bytes. - * @param[in] output_z Channels in output tensor - * @param[in] offset_z The offset on the Z axis to start concatenating the input tensor - * It is user responsibility to provide the correct value - * - * Input constraints - * offset_z is less than output_z - * - */ -void arm_concatenation_s8_z(const int8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_z, - const uint16_t input_w, - int8_t *output, - const uint16_t output_z, - const uint32_t offset_z); - -/** - * @brief int8/uint8 concatenation function to be used for concatenating N-tensors along the W axis (Batch size) - * This function should be called for each input tensor to concatenate. The argument offset_w - * will be used to store the input tensor in the correct position in the output tensor - * - * i.e. offset_w = 0 - * for(i = 0 i < num_input_tensors; ++i) - * { - * arm_concatenation_s8_w(&input[i], ..., &output, ..., ..., offset_w) - * offset_w += input_w[i] - * } - * - * This function assumes that the output tensor has: - * -# The same width of the input tensor - * -# The same height of the input tensor - * -# The same number o channels of the input tensor - * - * Unless specified otherwise, arguments are mandatory. - * - * @note This function, data layout independent, can be used to concatenate either int8 or uint8 tensors because it - * does not involve any arithmetic operation - * - * @param[in] input Pointer to input tensor - * @param[in] input_x Width of input tensor - * @param[in] input_y Height of input tensor - * @param[in] input_z Channels in input tensor - * @param[in] input_w Batch size in input tensor - * @param[out] output Pointer to output tensor. Expected to be at least - * input_x * input_y * input_z * input_w - * bytes. - * @param[in] offset_w The offset on the W axis to start concatenating the input tensor - * It is user responsibility to provide the correct value - * - */ -void arm_concatenation_s8_w(const int8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_z, - const uint16_t input_w, - int8_t *output, - const uint32_t offset_w); -/** - * @defgroup SVDF SVDF Layer Functions - * - */ - -/** - * @brief s8 SVDF function with 8 bit state tensor and 8 bit time weights - * - * @param[in] input_ctx Temporary scratch buffer - * @param[in] output_ctx Temporary output scratch buffer - * @param[in] svdf_params SVDF Parameters - * Range of svdf_params->input_offset : [-128, 127] - * Range of svdf_params->output_offset : [-128, 127] - * @param[in] input_quant_params Input quantization parameters - * @param[in] output_quant_params Output quantization parameters - * @param[in] input_dims Input tensor dimensions - * @param[in] input_data Pointer to input tensor - * @param[in] state_dims State tensor dimensions - * @param[in] state_data Pointer to state tensor - * @param[in] weights_feature_dims Weights (feature) tensor dimensions - * @param[in] weights_feature_data Pointer to the weights (feature) tensor - * @param[in] weights_time_dims Weights (time) tensor dimensions - * @param[in] weights_time_data Pointer to the weights (time) tensor - * @param[in] bias_dims Bias tensor dimensions - * @param[in] bias_data Pointer to bias tensor - * @param[in] output_dims Output tensor dimensions - * @param[out] output_data Pointer to the output tensor - * - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * 1. Supported framework: TensorFlow Lite micro - * 2. q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs. - * - */ -arm_status arm_svdf_s8(const cmsis_nn_context *input_ctx, - const cmsis_nn_context *output_ctx, - const cmsis_nn_svdf_params *svdf_params, - const cmsis_nn_per_tensor_quant_params *input_quant_params, - const cmsis_nn_per_tensor_quant_params *output_quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *state_dims, - q7_t *state_data, - const cmsis_nn_dims *weights_feature_dims, - const q7_t *weights_feature_data, - const cmsis_nn_dims *weights_time_dims, - const q7_t *weights_time_data, - const cmsis_nn_dims *bias_dims, - const q31_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - -/** - * @brief s8 SVDF function with 16 bit state tensor and 16 bit time weights - * - * @param[in] input_ctx Temporary scratch buffer - * @param[in] output_ctx Temporary output scratch buffer - * @param[in] svdf_params SVDF Parameters - * Range of svdf_params->input_offset : [-128, 127] - * Range of svdf_params->output_offset : [-128, 127] - * @param[in] input_quant_params Input quantization parameters - * @param[in] output_quant_params Output quantization parameters - * @param[in] input_dims Input tensor dimensions - * @param[in] input_data Pointer to input tensor - * @param[in] state_dims State tensor dimensions - * @param[in] state_data Pointer to state tensor - * @param[in] weights_feature_dims Weights (feature) tensor dimensions - * @param[in] weights_feature_data Pointer to the weights (feature) tensor - * @param[in] weights_time_dims Weights (time) tensor dimensions - * @param[in] weights_time_data Pointer to the weights (time) tensor - * @param[in] bias_dims Bias tensor dimensions - * @param[in] bias_data Pointer to bias tensor - * @param[in] output_dims Output tensor dimensions - * @param[out] output_data Pointer to the output tensor - * - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * 1. Supported framework: TensorFlow Lite micro - * 2. q7 is used as data type eventhough it is s8 data. It is done so to be consistent with existing APIs. - * - */ -arm_status arm_svdf_state_s16_s8(const cmsis_nn_context *input_ctx, - const cmsis_nn_context *output_ctx, - const cmsis_nn_svdf_params *svdf_params, - const cmsis_nn_per_tensor_quant_params *input_quant_params, - const cmsis_nn_per_tensor_quant_params *output_quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *state_dims, - q15_t *state_data, - const cmsis_nn_dims *weights_feature_dims, - const q7_t *weights_feature_data, - const cmsis_nn_dims *weights_time_dims, - const q15_t *weights_time_data, - const cmsis_nn_dims *bias_dims, - const q31_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nnsupportfunctions.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nnsupportfunctions.h deleted file mode 100644 index 4b50564..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Include/arm_nnsupportfunctions.h +++ /dev/null @@ -1,1186 +0,0 @@ -/* - * Copyright (C) 2010-2022 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nnsupportfunctions.h - * Description: Public header file of support functions for CMSIS NN Library - * - * $Date: 19. April 2022 - * $Revision: V.7.0.1 - * - * Target Processor: Cortex-M CPUs - * -------------------------------------------------------------------- */ - -#ifndef _ARM_NNSUPPORTFUNCTIONS_H_ -#define _ARM_NNSUPPORTFUNCTIONS_H_ - -#include "arm_nn_math_types.h" -#include "arm_nn_types.h" - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#define LEFT_SHIFT(_shift) (_shift > 0 ? _shift : 0) -#define RIGHT_SHIFT(_shift) (_shift > 0 ? 0 : -_shift) -#define MASK_IF_ZERO(x) (x) == 0 ? ~0 : 0 -#define MASK_IF_NON_ZERO(x) (x) != 0 ? ~0 : 0 -#define SELECT_USING_MASK(mask, a, b) ((mask) & (a)) ^ (~(mask) & (b)) - -#define MAX(A, B) ((A) > (B) ? (A) : (B)) -#define MIN(A, B) ((A) < (B) ? (A) : (B)) -#define CLAMP(x, h, l) MAX(MIN((x), (h)), (l)) -#define REDUCE_MULTIPLIER(_mult) ((_mult < 0x7FFF0000) ? ((_mult + (1 << 15)) >> 16) : 0x7FFF) - -/** - * @brief definition to pack four 8 bit values. - */ -#define PACK_Q7x4_32x1(v0, v1, v2, v3) \ - ((((int32_t)(v0) << 0) & (int32_t)0x000000FF) | (((int32_t)(v1) << 8) & (int32_t)0x0000FF00) | \ - (((int32_t)(v2) << 16) & (int32_t)0x00FF0000) | (((int32_t)(v3) << 24) & (int32_t)0xFF000000)) - -/** - * @brief Union for SIMD access of q31/q15/q7 types - */ -union arm_nnword -{ - q31_t word; - /**< q31 type */ - q15_t half_words[2]; - /**< q15 type */ - q7_t bytes[4]; - /**< q7 type */ -}; - -/** - * @brief Union for data type long long - */ -struct arm_nn_double -{ - uint32_t low; - int32_t high; -}; - -union arm_nn_long_long -{ - int64_t long_long; - struct arm_nn_double word; -}; - -/** - * @defgroup nndata_convert Neural Network Data Conversion Functions - * - * Perform data type conversion in-between neural network operations - * - */ - -/** - * @brief Converts the elements of the q7 vector to q15 vector without left-shift - * @param[in] *pSrc points to the q7 input vector - * @param[out] *pDst points to the q15 output vector - * @param[in] blockSize length of the input vector - * - */ -void arm_q7_to_q15_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t blockSize); - -/** - * @brief Non-saturating addition of elements of a q7 vector - * @param[in] *input Pointer to the q7 input vector - * @param[out] *output Pointer to the q31 output variable. - * @param[in] block_size length of the input vector - * \par Description: - * - * 2^24 samples can be added without saturating the result. - * - * The equation used for the conversion process is: - * - *
- *  sum = input[0] + input[1] + .. + input[block_size -1]
- * 
- * - * */ -void arm_nn_add_q7(const q7_t *input, q31_t *output, uint32_t block_size); - -/** - * @brief Converts the elements of the q7 vector to reordered q15 vector without left-shift - * @param[in] *pSrc points to the q7 input vector - * @param[out] *pDst points to the q15 output vector - * @param[in] blockSize length of the input vector - * @return none. - * - */ -void arm_q7_to_q15_reordered_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t blockSize); - -/** - * @brief Converts the elements from a q7 vector to a q15 vector with an added offset - * @param[in] src pointer to the q7 input vector - * @param[out] dst pointer to the q15 output vector - * @param[in] block_size length of the input vector - * @param[in] offset q7 offset to be added to each input vector element. - * - * \par Description: - * - * The equation used for the conversion process is: - * - *
- *  dst[n] = (q15_t) src[n] + offset;   0 <= n < block_size.
- * 
- * - */ -void arm_q7_to_q15_with_offset(const q7_t *src, q15_t *dst, uint32_t block_size, q15_t offset); - -/** - * @brief Converts the elements of the q7 vector to reordered q15 vector with an added offset - * @param[in] src pointer to the q7 input vector - * @param[out] dst pointer to the q15 output vector - * @param[in] block_size length of the input vector - * @param[in] offset offset to be added to each input vector element. - * @return none. - * - * @details This function does the q7 to q15 expansion with re-ordering of bytes. Re-ordering is a consequence of - * the sign extension intrinsic(DSP extension). The tail (i.e., last (N % 4) elements) retains its - * original order. - * - */ -void arm_q7_to_q15_reordered_with_offset(const q7_t *src, q15_t *dst, uint32_t block_size, q15_t offset); - -/** - * @brief Converts the elements from a q7 vector and accumulate to a q15 vector - * @param[in] *src points to the q7 input vector - * @param[out] *dst points to the q15 output vector - * @param[in] block_size length of the input vector - * - * \par Description: - * - * The equation used for the conversion process is: - * - *
- *  dst[n] += (q15_t) src[n] ;   0 <= n < block_size.
- * 
- * - */ -void arm_nn_accumulate_q7_to_q15(q15_t *dst, const q7_t *src, uint32_t block_size); - -/** - * @brief Depthwise conv on an im2col buffer where the input channel equals output channel. - * @param[in] row pointer to row - * @param[in] col pointer to im2col buffer, always consists of 2 columns. - * @param[in] num_ch number of channels - * @param[in] out_shift pointer to per output channel requantization shift parameter. - * @param[in] out_mult pointer to per output channel requantization multiplier parameter. - * @param[in] out_offset output tensor offset. - * @param[in] activation_min minimum value to clamp the output to. Range : int8 - * @param[in] activation_max maximum value to clamp the output to. Range : int8 - * @param[in] kernel_size number of elements in one column. - * @param[in] output_bias per output channel bias. Range : int32 - * @param[out] out pointer to output - * @return The function returns one of the two - * 1. The incremented output pointer for a successful operation or - * 2. NULL if implementation is not available. - * - * @details Supported framework: TensorFlow Lite micro. - */ -q7_t *arm_nn_depthwise_conv_s8_core(const q7_t *row, - const q15_t *col, - const uint16_t num_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int32_t activation_min, - const int32_t activation_max, - const uint16_t kernel_size, - const int32_t *const output_bias, - q7_t *out); - -/** - * @brief General Matrix-multiplication function with per-channel requantization. - * @param[in] input_row pointer to row operand - * @param[in] input_col pointer to col operand - * @param[in] output_ch number of rows of input_row - * @param[in] col_batches number of column batches. Range: 1 to 4 - * @param[in] output_shift pointer to per output channel requantization shift parameter. - * @param[in] output_mult pointer to per output channel requantization multiplier parameter. - * @param[in] out_offset output tensor offset. - * @param[in] col_offset input tensor(col) offset. - * @param[in] row_offset kernel offset(row). Not used. - * @param[in] out_activation_min minimum value to clamp the output to. Range : int8 - * @param[in] out_activation_max maximum value to clamp the output to. Range : int8 - * @param[in] row_len number of elements in each row - * @param[in] bias per output channel bias. Range : int32 - * @param[in,out] out pointer to output - * @return The function returns one of the two - * 1. The incremented output pointer for a successful operation or - * 2. NULL if implementation is not available. - * - * @details Supported framework: TensorFlow Lite - */ -q7_t *arm_nn_mat_mult_s8(const q7_t *input_row, - const q7_t *input_col, - const uint16_t output_ch, - const uint16_t col_batches, - const int32_t *output_shift, - const int32_t *output_mult, - const int32_t out_offset, - const int32_t col_offset, - const int32_t row_offset, - const int16_t out_activation_min, - const int16_t out_activation_max, - const uint16_t row_len, - const int32_t *const bias, - q7_t *out); -/** - * @brief Matrix-multiplication function for convolution with per-channel requantization for 16 bits convolution. - * @param[in] input_a pointer to operand A - * @param[in] input_b pointer to operand B, always consists of 2 vectors. - * @param[in] output_ch number of rows of A - * @param[in] out_shift pointer to per output channel requantization shift parameter. - * @param[in] out_mult pointer to per output channel requantization multiplier parameter. - * @param[in] activation_min minimum value to clamp the output to. Range : int16 - * @param[in] activation_max maximum value to clamp the output to. Range : int16 - * @param[in] num_col_a number of columns of A - * @param[in] output_bias per output channel bias. Range : int64 - * @param[in,out] out_0 pointer to output - * @return The function returns one of the two - * 1. The incremented output pointer for a successful operation or - * 2. NULL if implementation is not available. - * - * @details This function does the matrix multiplication of weight matrix for all output channels - * with 2 columns from im2col and produces two elements/output_channel. The outputs are - * clamped in the range provided by activation min and max. - * Supported framework: TensorFlow Lite micro. - */ -q15_t *arm_nn_mat_mult_kernel_s16(const q7_t *input_a, - const q15_t *input_b, - const int32_t output_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int16_t activation_min, - const int16_t activation_max, - const int32_t num_col_a, - const int64_t *const output_bias, - q15_t *out_0); -/** - * @brief General Matrix-multiplication without requantization for one row & one column - * @param[in] row_elements number of row elements - * @param[in] row_base pointer to row operand - * @param[in] col_base pointer to col operand - * @param[out] sum_col pointer to store sum of column elements - * @param[out] output pointer to store result of multiply-accumulate - * @return The function returns the multiply-accumulated result of the row by column. - * - * @details Pseudo-code - * *output = 0 - * sum_col = 0 - * for (i = 0; i < row_elements; i++) - * *output += row_base[i] * col_base[i] - * sum_col += col_base[i] - * - */ -arm_status arm_nn_mat_mul_core_1x_s8(int32_t row_elements, - const int8_t *row_base, - const int8_t *col_base, - int32_t *const sum_col, - int32_t *const output); - -/** - * @brief Matrix-multiplication with requantization & activation function for four rows and one column - * @param[in] row_elements number of row elements - * @param[in] offset offset between rows. Can be the same as row_elements. - * For e.g, in a 1x1 conv scenario with stride as 1. - * @param[in] row_base pointer to row operand - * @param[in] col_base pointer to col operand - * @param[in] out_ch Number of output channels - * @param[in] conv_params Pointer to convolution parameters like offsets and activation values - * @param[in] quant_params Pointer to per-channel quantization parameters - * @param[in] bias Pointer to per-channel bias - * @param[out] output Pointer to output where int8 results are stored. - * - * @return The function returns the updated output pointer or NULL if implementation is not available. - * - * @details Compliant to TFLM int8 specification. MVE implementation only - */ -int8_t *arm_nn_mat_mul_core_4x_s8(const int32_t row_elements, - const int32_t offset, - const int8_t *row_base, - const int8_t *col_base, - const int32_t out_ch, - const cmsis_nn_conv_params *conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const int32_t *bias, - int8_t *output); - -/** - * @brief General Matrix-multiplication function with per-channel requantization. - * This function assumes: - * - LHS input matrix NOT transposed (nt) - * - RHS input matrix transposed (t) - * - * @note This operation also performs the broadcast bias addition before the requantization - * - * @param[in] lhs Pointer to the LHS input matrix - * @param[in] rhs Pointer to the RHS input matrix - * @param[in] bias Pointer to the bias vector. The length of this vector is equal to the number of - * output columns (or RHS input rows) - * @param[out] dst Pointer to the output matrix with "m" rows and "n" columns - * @param[in] dst_multipliers Pointer to the multipliers vector needed for the per-channel requantization. - * The length of this vector is equal to the number of output columns (or RHS input - * rows) - * @param[in] dst_shifts Pointer to the shifts vector needed for the per-channel requantization. The length - * of this vector is equal to the number of output columns (or RHS input rows) - * @param[in] lhs_rows Number of LHS input rows - * @param[in] rhs_rows Number of RHS input rows - * @param[in] rhs_cols Number of LHS/RHS input columns - * @param[in] lhs_offset Offset to be applied to the LHS input value - * @param[in] dst_offset Offset to be applied the output result - * @param[in] activation_min Minimum value to clamp down the output. Range : int8 - * @param[in] activation_max Maximum value to clamp up the output. Range : int8 - * - * @return The function returns ARM_MATH_SUCCESS - * - */ -arm_status arm_nn_mat_mult_nt_t_s8(const q7_t *lhs, - const q7_t *rhs, - const q31_t *bias, - q7_t *dst, - const int32_t *dst_multipliers, - const int32_t *dst_shifts, - const int32_t lhs_rows, - const int32_t rhs_rows, - const int32_t rhs_cols, - const int32_t lhs_offset, - const int32_t dst_offset, - const int32_t activation_min, - const int32_t activation_max); - -/** - * @brief s8 Vector by Matrix (transposed) multiplication - * - * @param[in] lhs Input left-hand side vector - * @param[in] rhs Input right-hand side matrix (transposed) - * @param[in] bias Input bias - * @param[out] dst Output vector - * @param[in] lhs_offset Offset to be added to the input values of the left-hand side vector. - * Range: -127 to 128 - * @param[in] rhs_offset Not used - * @param[in] dst_offset Offset to be added to the output values. Range: -127 to 128 - * @param[in] dst_multiplier Output multiplier - * @param[in] dst_shift Output shift - * @param[in] rhs_cols Number of columns in the right-hand side input matrix - * @param[in] rhs_rows Number of rows in the right-hand side input matrix - * @param[in] activation_min Minimum value to clamp the output to. Range: int8 - * @param[in] activation_max Maximum value to clamp the output to. Range: int8 - * @param[in] address_offset Memory position offset for dst. First output is stored at 'dst', the - * second at 'dst + address_offset' and so on. Default value is typically 1. - * - * @return The function returns ARM_MATH_SUCCESS - * - */ -arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs, - const q7_t *rhs, - const q31_t *bias, - q7_t *dst, - const int32_t lhs_offset, - const int32_t rhs_offset, - const int32_t dst_offset, - const int32_t dst_multiplier, - const int32_t dst_shift, - const int32_t rhs_cols, - const int32_t rhs_rows, - const int32_t activation_min, - const int32_t activation_max, - const int32_t address_offset); - -/** - * @brief s16 Vector by Matrix (transposed) multiplication - * - * @param[in] lhs Input left-hand side vector - * @param[in] rhs Input right-hand side matrix (transposed) - * @param[in] bias Input bias - * @param[out] dst Output vector - * @param[in] dst_multiplier Output multiplier - * @param[in] dst_shift Output shift - * @param[in] rhs_cols Number of columns in the right-hand side input matrix - * @param[in] rhs_rows Number of rows in the right-hand side input matrix - * @param[in] activation_min Minimum value to clamp the output to. Range: int16 - * @param[in] activation_max Maximum value to clamp the output to. Range: int16 - * - * @return The function returns ARM_MATH_SUCCESS - * - */ -arm_status arm_nn_vec_mat_mult_t_s16(const q15_t *lhs, - const q7_t *rhs, - const q63_t *bias, - q15_t *dst, - const int32_t dst_multiplier, - const int32_t dst_shift, - const int32_t rhs_cols, - const int32_t rhs_rows, - const int32_t activation_min, - const int32_t activation_max); - -/** - * @brief s8 Vector by Matrix (transposed) multiplication with s16 output - * - * @param[in] lhs Input left-hand side vector - * @param[in] rhs Input right-hand side matrix (transposed) - * @param[out] dst Output vector - * @param[in] lhs_offset Offset to be added to the input values of the left-hand side - * vector. Range: -127 to 128 - * @param[in] rhs_offset Not used - * @param[in] scatter_offset Address offset for dst. First output is stored at 'dst', the - * second at 'dst + scatter_offset' and so on. - * @param[in] dst_multiplier Output multiplier - * @param[in] dst_shift Output shift - * @param[in] rhs_cols Number of columns in the right-hand side input matrix - * @param[in] rhs_rows Number of rows in the right-hand side input matrix - * @param[in] activation_min Minimum value to clamp the output to. Range: int16 - * @param[in] activation_max Maximum value to clamp the output to. Range: int16 - * - * @return The function returns ARM_MATH_SUCCESS - * - */ -arm_status arm_nn_vec_mat_mult_t_svdf_s8(const q7_t *lhs, - const q7_t *rhs, - q15_t *dst, - const int32_t lhs_offset, - const int32_t rhs_offset, - const int32_t scatter_offset, - const int32_t dst_multiplier, - const int32_t dst_shift, - const int32_t rhs_cols, - const int32_t rhs_rows, - const int32_t activation_min, - const int32_t activation_max); - -/** - * @brief Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in padded cases where - * the padding is -lhs_offset(Range: int8). Dimensions are the same for lhs and rhs. - * - * @param[in] lhs Input left-hand side matrix - * @param[in] rhs Input right-hand side matrix (transposed) - * @param[in] lhs_offset LHS matrix offset(input offset). Range: -127 to 128 - * @param[in] num_ch Number of channels in LHS/RHS - * @param[in] out_shift Per channel output shift. Length of vector is equal to number of channels - * @param[in] out_mult Per channel output multiplier. Length of vector is equal to number of channels - * @param[in] out_offset Offset to be added to the output values. Range: -127 to 128 - * @param[in] activation_min Minimum value to clamp the output to. Range: int8 - * @param[in] activation_max Maximum value to clamp the output to. Range: int8 - * @param[in] row_x_col (row_dimension * col_dimension) of LHS/RHS matrix - * @param[in] output_bias Per channel output bias. Length of vector is equal to number of channels - * @param[in] out Output pointer - * - * @return The function returns one of the two - * - Updated output pointer if an implementation is available - * - NULL if no implementation is available. - * - * @note If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read - * out for the following. - * - Output shift - * - Output multiplier - * - Output bias - * - rhs - */ -q7_t *arm_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs, - const q7_t *rhs, - const int32_t lhs_offset, - const uint16_t num_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int32_t activation_min, - const int32_t activation_max, - const uint16_t row_x_col, - const int32_t *const output_bias, - q7_t *out); - -/** - * @brief Depthwise convolution of transposed rhs matrix with 4 lhs matrices. To be used in non-padded cases. - * Dimensions are the same for lhs and rhs. - * - * @param[in] lhs Input left-hand side matrix - * @param[in] rhs Input right-hand side matrix (transposed) - * @param[in] lhs_offset LHS matrix offset(input offset). Range: -127 to 128 - * @param[in] num_ch Number of channels in LHS/RHS - * @param[in] out_shift Per channel output shift. Length of vector is equal to number of channels. - * @param[in] out_mult Per channel output multiplier. Length of vector is equal to number of channels. - * @param[in] out_offset Offset to be added to the output values. Range: -127 to 128 - * @param[in] activation_min Minimum value to clamp the output to. Range: int8 - * @param[in] activation_max Maximum value to clamp the output to. Range: int8 - * @param[in] row_x_col (row_dimension * col_dimension) of LHS/RHS matrix - * @param[in] output_bias Per channel output bias. Length of vector is equal to number of channels. - * @param[in] out Output pointer - * - * @return The function returns one of the two - * - Updated output pointer if an implementation is available - * - NULL if no implementation is available. - * - * @note If number of channels is not a multiple of 4, upto 3 elements outside the boundary will be read - * out for the following. - * - Output shift - * - Output multiplier - * - Output bias - * - rhs - */ -q7_t *arm_nn_depthwise_conv_nt_t_s8(const q7_t *lhs, - const q7_t *rhs, - const int32_t lhs_offset, - const uint16_t num_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int32_t activation_min, - const int32_t activation_max, - const uint16_t row_x_col, - const int32_t *const output_bias, - q7_t *out); - -/** - *@brief Matrix-multiplication function for convolution with reordered columns - *@param[in] pA pointer to operand A - *@param[in] pInBuffer pointer to operand B, always conssists of 2 vectors - *@param[in] ch_im_out numRow of A - *@param[in] numCol_A numCol of A - *@param[in] bias_shift amount of left-shift for bias - *@param[in] out_shift amount of right-shift for output - *@param[in] bias the bias - *@param[in,out] pOut pointer to output - *@return The function returns the incremented output pointer - * - *@details This function assumes that data in pInBuffer are reordered - */ -q7_t *arm_nn_mat_mult_kernel_q7_q15_reordered(const q7_t *pA, - const q15_t *pInBuffer, - const uint16_t ch_im_out, - const uint16_t numCol_A, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t *bias, - q7_t *pOut); - -/** - @brief Read 2 q15 elements and post increment pointer. - @param[in] in_q15 Pointer to pointer that holds address of input. - @return q31 value - */ -__STATIC_FORCEINLINE q31_t arm_nn_read_q15x2_ia(const q15_t **in_q15) -{ - q31_t val; - - memcpy(&val, *in_q15, 4); - *in_q15 += 2; - - return (val); -} - -/** - @brief Read 4 q7 from q7 pointer and post increment pointer. - @param[in] in_q7 Pointer to pointer that holds address of input. - @return q31 value - */ -__STATIC_FORCEINLINE q31_t arm_nn_read_q7x4_ia(const q7_t **in_q7) -{ - q31_t val; - memcpy(&val, *in_q7, 4); - *in_q7 += 4; - - return (val); -} - -/** - @brief Read 2 q15 from q15 pointer. - @param[in] in_q15 pointer to address of input. - @return q31 value - */ -__STATIC_FORCEINLINE q31_t arm_nn_read_q15x2(const q15_t *in_q15) -{ - q31_t val; - memcpy(&val, in_q15, 4); - - return (val); -} - -/** - @brief Read 4 q7 values. - @param[in] in_q7 pointer to address of input. - @return q31 value - */ -__STATIC_FORCEINLINE q31_t arm_nn_read_q7x4(const q7_t *in_q7) -{ - q31_t val; - memcpy(&val, in_q7, 4); - - return (val); -} - -/** - @brief Write four q7 to q7 pointer and increment pointer afterwards. - @param[in] in Double pointer to input value - @param[in] value Four bytes to copy - */ -__STATIC_FORCEINLINE void arm_nn_write_q7x4_ia(q7_t **in, q31_t value) -{ - memcpy(*in, &value, 4); - *in += 4; -} - -/** - * @brief memset optimized for MVE - * @param[in, out] dst Destination pointer - * @param[in] val Value to set - * @param[in] block_size Number of bytes to copy. - * - */ -__STATIC_FORCEINLINE void arm_memset_q7(q7_t *dst, const q7_t val, uint32_t block_size) -{ -#if defined(ARM_MATH_MVEI) - __asm volatile(" vdup.8 q0, %[set_val] \n" - " wlstp.8 lr, %[cnt], 1f \n" - "2: \n" - " vstrb.8 q0, [%[in]], #16 \n" - " letp lr, 2b \n" - "1: \n" - : [ in ] "+r"(dst) - : [ cnt ] "r"(block_size), [ set_val ] "r"(val) - : "q0", "memory", "r14"); -#else - memset(dst, val, block_size); -#endif -} - -#if defined(ARM_MATH_DSP) - -/** - * @brief read and expand one q7 word into two q15 words - */ - -__STATIC_FORCEINLINE const q7_t *read_and_pad(const q7_t *source, q31_t *out1, q31_t *out2) -{ - q31_t inA = arm_nn_read_q7x4_ia(&source); - q31_t inAbuf1 = __SXTB16_RORn((uint32_t)inA, 8); - q31_t inAbuf2 = __SXTB16(inA); - -#ifndef ARM_MATH_BIG_ENDIAN - *out2 = (int32_t)(__PKHTB(inAbuf1, inAbuf2, 16)); - *out1 = (int32_t)(__PKHBT(inAbuf2, inAbuf1, 16)); -#else - *out1 = (int32_t)(__PKHTB(inAbuf1, inAbuf2, 16)); - *out2 = (int32_t)(__PKHBT(inAbuf2, inAbuf1, 16)); -#endif - - return source; -} - -/** - * @brief read and expand one q7 word into two q15 words with reordering - */ - -__STATIC_FORCEINLINE const q7_t *read_and_pad_reordered(const q7_t *source, q31_t *out1, q31_t *out2) -{ - q31_t inA = arm_nn_read_q7x4_ia(&source); -#ifndef ARM_MATH_BIG_ENDIAN - *out2 = __SXTB16(__ROR((uint32_t)inA, 8)); - *out1 = __SXTB16(inA); -#else - *out1 = __SXTB16(__ROR((uint32_t)inA, 8)); - *out2 = __SXTB16(inA); -#endif - - return source; -} - -/** - * @brief read and expand one q7 word into two q15 words with reordering and add an offset - */ -__STATIC_FORCEINLINE const q7_t * -read_and_pad_reordered_with_offset(const q7_t *source, q31_t *out1, q31_t *out2, q31_t offset) -{ - q31_t inA = arm_nn_read_q7x4_ia(&source); - -#ifndef ARM_MATH_BIG_ENDIAN - *out2 = __SXTB16(__ROR((uint32_t)inA, 8)); - *out1 = __SXTB16(inA); -#else - *out1 = __SXTB16(__ROR((uint32_t)inA, 8)); - *out2 = __SXTB16(inA); -#endif - *out1 = __QADD16(*out1, offset); - *out2 = __QADD16(*out2, offset); - - return source; -} - -#endif - -/** - * @defgroup NNBasicMath Basic Math Functions for Neural Network Computation - * - * Basic Math Functions for Neural Network Computation - * - */ - -/** - * @brief q7 vector multiplication with variable output shifts - * @param[in] *pSrcA pointer to the first input vector - * @param[in] *pSrcB pointer to the second input vector - * @param[out] *pDst pointer to the output vector - * @param[in] out_shift amount of right-shift for output - * @param[in] blockSize number of samples in each vector - * @return none. - * - * Scaling and Overflow Behavior: - * \par - * The function uses saturating arithmetic. - * Results outside of the allowable q15 range [0x8000 0x7FFF] will be saturated. - */ - -void arm_nn_mult_q15(q15_t *pSrcA, q15_t *pSrcB, q15_t *pDst, const uint16_t out_shift, uint32_t blockSize); - -/** - * @brief q7 vector multiplication with variable output shifts - * @param[in] *pSrcA pointer to the first input vector - * @param[in] *pSrcB pointer to the second input vector - * @param[out] *pDst pointer to the output vector - * @param[in] out_shift amount of right-shift for output - * @param[in] blockSize number of samples in each vector - * @return none. - * - * Scaling and Overflow Behavior: - * \par - * The function uses saturating arithmetic. - * Results outside of the allowable q7 range [0x80 0x7F] will be saturated. - */ - -void arm_nn_mult_q7(q7_t *pSrcA, q7_t *pSrcB, q7_t *pDst, const uint16_t out_shift, uint32_t blockSize); - -/** - * @brief Matrix-multiplication function for convolution with per-channel requantization. - * @param[in] input_a pointer to operand A - * @param[in] input_b pointer to operand B, always consists of 2 vectors. - * @param[in] output_ch number of rows of A - * @param[in] out_shift pointer to per output channel requantization shift parameter. - * @param[in] out_mult pointer to per output channel requantization multiplier parameter. - * @param[in] out_offset output tensor offset. - * @param[in] activation_min minimum value to clamp the output to. Range : int8 - * @param[in] activation_max maximum value to clamp the output to. Range : int8 - * @param[in] num_col_a number of columns of A - * @param[in] output_bias per output channel bias. Range : int32 - * @param[in,out] out_0 pointer to output - * @return The function returns one of the two - * 1. The incremented output pointer for a successful operation or - * 2. NULL if implementation is not available. - * - * @details This function does the matrix multiplication of weight matrix for all output channels - * with 2 columns from im2col and produces two elements/output_channel. The outputs are - * clamped in the range provided by activation min and max. - * Supported framework: TensorFlow Lite micro. - */ -q7_t *arm_nn_mat_mult_kernel_s8_s16(const q7_t *input_a, - const q15_t *input_b, - const uint16_t output_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int16_t activation_min, - const int16_t activation_max, - const uint16_t num_col_a, - const int32_t *const output_bias, - q7_t *out_0); - -/** - * @brief Common softmax function for s8 input and s8 or s16 output - * @param[in] input Pointer to the input tensor - * @param[in] num_rows Number of rows in the input tensor - * @param[in] row_size Number of elements in each input row - * @param[in] mult Input quantization multiplier - * @param[in] shift Input quantization shift within the range [0, 31] - * @param[in] diff_min Minimum difference with max in row. Used to check if - * the quantized exponential operation can be performed - * @param[in] int16_output Indicating s8 output if 0 else s16 output - * @param[out] output Pointer to the output tensor - * - * @note Supported framework: TensorFlow Lite micro (bit-accurate) - * - */ -void arm_nn_softmax_common_s8(const int8_t *input, - const int32_t num_rows, - const int32_t row_size, - const int32_t mult, - const int32_t shift, - const int32_t diff_min, - const bool int16_output, - void *output); - -/** - * @brief macro for adding rounding offset - */ -#ifndef ARM_NN_TRUNCATE -#define NN_ROUND(out_shift) ((0x1 << out_shift) >> 1) -#else -#define NN_ROUND(out_shift) 0 -#endif - -// Macros for shortening quantization functions' names and avoid long lines -#define MUL_SAT(a, b) arm_nn_doubling_high_mult((a), (b)) -#define MUL_SAT_MVE(a, b) arm_doubling_high_mult_mve_32x4((a), (b)) -#define MUL_POW2(a, b) arm_nn_mult_by_power_of_two((a), (b)) - -#define DIV_POW2(a, b) arm_nn_divide_by_power_of_two((a), (b)) -#define DIV_POW2_MVE(a, b) arm_divide_by_power_of_two_mve((a), (b)) - -#define EXP_ON_NEG(x) arm_nn_exp_on_negative_values((x)) -#define ONE_OVER1(x) arm_nn_one_over_one_plus_x_for_x_in_0_1((x)) - -/** - * @brief Saturating doubling high multiply. Result matches - * NEON instruction VQRDMULH. - * @param[in] m1 Multiplicand. Range: {NN_Q31_MIN, NN_Q31_MAX} - * @param[in] m2 Multiplier. Range: {NN_Q31_MIN, NN_Q31_MAX} - * @return Result of multiplication. - * - */ -__STATIC_FORCEINLINE q31_t arm_nn_doubling_high_mult(const q31_t m1, const q31_t m2) -{ - q31_t result = 0; - // Rounding offset to add for a right shift of 31 - q63_t mult = 1 << 30; - - if ((m1 < 0) ^ (m2 < 0)) - { - mult = 1 - mult; - } - // Gets resolved as a SMLAL instruction - mult = mult + (q63_t)m1 * m2; - - // Utilize all of the upper 32 bits. This is the doubling step - // as well. - result = (int32_t)(mult / (1ll << 31)); - - if ((m1 == m2) && (m1 == (int32_t)NN_Q31_MIN)) - { - result = NN_Q31_MAX; - } - return result; -} - -/** - * @brief Doubling high multiply without saturation. This is intended - * for requantization where the scale is a positive integer - * - * @param[in] m1 Multiplicand. Range: {NN_Q31_MIN, NN_Q31_MAX} - * @param[in] m2 Multiplier Range: {NN_Q31_MIN, NN_Q31_MAX} - * @return Result of multiplication. - * @note The result of this matches that of neon instruction - * VQRDMULH for m1 in range {NN_Q31_MIN, NN_Q31_MAX} and m2 in - * range {NN_Q31_MIN + 1, NN_Q31_MAX}. Saturation occurs when - * m1 equals m2 equals NN_Q31_MIN and that is not handled by - * this function. - * - */ -__STATIC_FORCEINLINE q31_t arm_nn_doubling_high_mult_no_sat(const q31_t m1, const q31_t m2) -{ - q31_t result = 0; - union arm_nn_long_long mult; - - // Rounding offset to add for a right shift of 31 - mult.word.low = 1 << 30; - mult.word.high = 0; - - // Gets resolved as a SMLAL instruction - mult.long_long = mult.long_long + (q63_t)m1 * m2; - - // Utilize all of the upper 32 bits. This is the doubling step - // as well. - result = (int32_t)(mult.long_long >> 31); - - return result; -} - -/** - * @brief Rounding divide by power of two. - * @param[in] dividend - Dividend - * @param[in] exponent - Divisor = power(2, exponent) - * Range: [0, 31] - * @return Rounded result of division. Midpoint is rounded away from zero. - * - */ -__STATIC_FORCEINLINE q31_t arm_nn_divide_by_power_of_two(const q31_t dividend, const q31_t exponent) -{ - q31_t result = 0; - const q31_t remainder_mask = (1 << exponent) - 1; - int32_t remainder = remainder_mask & dividend; - - // Basic division - result = dividend >> exponent; - - // Adjust 'result' for rounding (mid point away from zero) - q31_t threshold = remainder_mask >> 1; - if (result < 0) - { - threshold++; - } - if (remainder > threshold) - { - result++; - } - - return result; -} - -/** - * @brief Requantize a given value. - * @param[in] val Value to be requantized - * @param[in] multiplier multiplier. Range {NN_Q31_MIN + 1, Q32_MAX} - * @param[in] shift left or right shift for 'val * multiplier' - * - * @return Returns (val * multiplier)/(2 ^ shift) - * - */ -__STATIC_FORCEINLINE q31_t arm_nn_requantize(const q31_t val, const q31_t multiplier, const q31_t shift) -{ -#ifdef CMSIS_NN_USE_SINGLE_ROUNDING - const int64_t total_shift = 31 - shift; - const int64_t new_val = val * (int64_t)multiplier; - - int32_t result = new_val >> (total_shift - 1); - result = (result + 1) >> 1; - - return result; -#else - return arm_nn_divide_by_power_of_two(arm_nn_doubling_high_mult_no_sat(val * (1 << LEFT_SHIFT(shift)), multiplier), - RIGHT_SHIFT(shift)); -#endif -} - -/** - * @brief Requantize a given 64 bit value. - * @param[in] val Value to be requantized in the range {-(1<<47)} to {(1<<47) - 1} - * @param[in] reduced_multiplier Reduced multiplier in the range {NN_Q31_MIN + 1, Q32_MAX} to {Q16_MIN + 1, - * Q16_MAX} - * @param[in] shift Left or right shift for 'val * multiplier' in the range {-31} to {7} - * - * @return Returns (val * multiplier)/(2 ^ shift) - * - */ -__STATIC_FORCEINLINE q31_t arm_nn_requantize_s64(const q63_t val, const q31_t reduced_multiplier, const q31_t shift) -{ - const q63_t new_val = val * reduced_multiplier; - - q31_t result = new_val >> (14 - shift); // 64->32 bit reduction - result = (result + 1) >> 1; // Last shift position and insert round - - return result; -} - -/** - * @brief memcpy optimized for MVE - * @param[in, out] dst Destination pointer - * @param[in] src Source pointer. - * @param[in] block_size Number of bytes to copy. - * - */ -__STATIC_FORCEINLINE void arm_memcpy_q7(q7_t *__RESTRICT dst, const q7_t *__RESTRICT src, uint32_t block_size) -{ -#if defined(ARM_MATH_MVEI) - __asm volatile(" wlstp.8 lr, %[cnt], 1f \n" - "2: \n" - " vldrb.8 q0, [%[in]], #16 \n" - " vstrb.8 q0, [%[out]], #16 \n" - " letp lr, 2b \n" - "1: \n" - : [ in ] "+r"(src), [ out ] "+r"(dst) - : [ cnt ] "r"(block_size) - : "q0", "memory", "r14"); -#else - memcpy(dst, src, block_size); -#endif -} - -#if defined(ARM_MATH_MVEI) -/** - * @brief Vector saturating doubling high multiply returning high half. - * @param[in] m1 Multiplicand - * @param[in] m2 Multiplier - * @return Result of multiplication. - * - */ -__STATIC_FORCEINLINE int32x4_t arm_doubling_high_mult_mve(const int32x4_t m1, const q31_t m2) -{ - return vqrdmulhq_n_s32(m1, m2); -} - -/** - * @brief Vector rounding divide by power of two. - * @param[in] dividend - Dividend vector - * @param[in] exponent - Divisor = power(2, exponent) - * Range: [0, 31] - * @return Rounded result of division. Midpoint is rounded away from zero. - * - */ -__STATIC_FORCEINLINE int32x4_t arm_divide_by_power_of_two_mve(const int32x4_t dividend, const q31_t exponent) -{ - const int32x4_t shift = vdupq_n_s32(-exponent); - const int32x4_t fixup = vshrq_n_s32(vandq_s32(dividend, shift), 31); - const int32x4_t fixed_up_dividend = vqaddq_s32(dividend, fixup); - return vrshlq_s32(fixed_up_dividend, shift); -} - -/** - * @brief Requantize a given vector. - * @param[in] val Vector to be requantized - * @param[in] multiplier multiplier - * @param[in] shift shift - * - * @return Returns (val * multiplier)/(2 ^ shift) - * - */ -__STATIC_FORCEINLINE int32x4_t arm_requantize_mve(const int32x4_t val, const q31_t multiplier, const q31_t shift) -{ -#ifdef CMSIS_NN_USE_SINGLE_ROUNDING - const int right_shift = MIN(-1, shift); - const int left_shift = shift - right_shift; - - const int32x4_t left_shift_dup = vdupq_n_s32(left_shift); - const int32x4_t right_shift_dup = vdupq_n_s32(right_shift); - - int32x4_t result = vqdmulhq_n_s32(vshlq_s32(val, left_shift_dup), multiplier); - result = vrshlq_s32(result, right_shift_dup); - - return result; -#else - return arm_divide_by_power_of_two_mve( - arm_doubling_high_mult_mve(vshlq_s32(val, vdupq_n_s32(LEFT_SHIFT(shift))), multiplier), RIGHT_SHIFT(shift)); -#endif -} - -__STATIC_FORCEINLINE int32x4_t arm_doubling_high_mult_mve_32x4(const int32x4_t m1, const int32x4_t m2) -{ - return vqrdmulhq_s32(m1, m2); -} - -__STATIC_FORCEINLINE int32x4_t arm_divide_by_power_of_two_mve_32x4(const int32x4_t dividend, const int32x4_t exponent) -{ - const int32x4_t shift = -exponent; - const int32x4_t fixup = vshrq_n_s32(vandq_s32(dividend, shift), 31); - const int32x4_t fixed_up_dividend = vqaddq_s32(dividend, fixup); - return vrshlq_s32(fixed_up_dividend, shift); -} - -__STATIC_FORCEINLINE int32x4_t arm_requantize_mve_32x4(const int32x4_t val, - const int32x4_t multiplier, - const int32x4_t shift) -{ -#ifdef CMSIS_NN_USE_SINGLE_ROUNDING - const int32x4_t right_shift = vminq_s32(vdupq_n_s32(-1), shift); - const int32x4_t left_shift = vqsubq_s32(shift, right_shift); - - int32x4_t result = vqdmulhq_s32(vshlq_s32(val, left_shift), multiplier); - result = vrshlq_s32(result, right_shift); - - return result; -#else - const int32x4_t zz = vdupq_n_s32(0); - const mve_pred16_t p = vcmpgtq_n_s32(shift, 0); - - const int32x4_t left_shift = vpselq_s32(shift, zz, p); - const int32x4_t right_shift = -vpselq_s32(zz, shift, p); - - return arm_divide_by_power_of_two_mve_32x4(arm_doubling_high_mult_mve_32x4(vshlq_s32(val, left_shift), multiplier), - right_shift); -#endif -} -#endif - -// @note The following functions are used only for softmax layer, scaled bits = 5 assumed - -__STATIC_FORCEINLINE int32_t arm_nn_exp_on_negative_values(int32_t val) -{ - int32_t mask = 0; - int32_t shift = 24; - - const int32_t val_mod_minus_quarter = (val & ((1 << shift) - 1)) - (1 << shift); - const int32_t remainder = val_mod_minus_quarter - val; - const int32_t x = (val_mod_minus_quarter << 5) + (1 << 28); - const int32_t x2 = MUL_SAT(x, x); - - int32_t result = 1895147668 + - MUL_SAT(1895147668, x + DIV_POW2(MUL_SAT(DIV_POW2(MUL_SAT(x2, x2), 2) + MUL_SAT(x2, x), 715827883) + x2, 1)); - -#define SELECT_IF_NON_ZERO(x) \ - { \ - mask = MASK_IF_NON_ZERO(remainder & (1 << shift++)); \ - result = SELECT_USING_MASK(mask, MUL_SAT(result, x), result); \ - } - - SELECT_IF_NON_ZERO(1672461947) - SELECT_IF_NON_ZERO(1302514674) - SELECT_IF_NON_ZERO(790015084) - SELECT_IF_NON_ZERO(290630308) - SELECT_IF_NON_ZERO(39332535) - SELECT_IF_NON_ZERO(720401) - SELECT_IF_NON_ZERO(242) - -#undef SELECT_IF_NON_ZERO - - mask = MASK_IF_ZERO(val); - return SELECT_USING_MASK(mask, NN_Q31_MAX, result); -} - -__STATIC_FORCEINLINE q31_t arm_nn_mult_by_power_of_two(const int32_t val, const int32_t exp) -{ - const int32_t thresh = ((1 << (31 - exp)) - 1); - int32_t result = val << exp; - result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val > thresh), NN_Q31_MAX, result); - result = SELECT_USING_MASK(MASK_IF_NON_ZERO(val < -thresh), NN_Q31_MIN, result); - return result; -} - -__STATIC_FORCEINLINE int32_t arm_nn_one_over_one_plus_x_for_x_in_0_1(int32_t val) -{ - const int64_t sum = (int64_t)val + (int64_t)NN_Q31_MAX; - const int32_t half_denominator = (int32_t)((sum + (sum >= 0 ? 1 : -1)) / 2L); - int32_t x = 1515870810 + MUL_SAT(half_denominator, -1010580540); - - const int32_t shift = (1 << 29); - x += MUL_POW2(MUL_SAT(x, shift - MUL_SAT(half_denominator, x)), 2); - x += MUL_POW2(MUL_SAT(x, shift - MUL_SAT(half_denominator, x)), 2); - x += MUL_POW2(MUL_SAT(x, shift - MUL_SAT(half_denominator, x)), 2); - - return MUL_POW2(x, 1); -} - -/** - @brief Write 2 q15 elements and post increment pointer. - @param[in] dest_q15 Pointer to pointer that holds address of destination. - @param[in] src_q31 Input value to be written. - */ -__STATIC_FORCEINLINE void arm_nn_write_q15x2_ia(q15_t **dest_q15, q31_t src_q31) -{ - q31_t val = src_q31; - - memcpy(*dest_q15, &val, 4); - *dest_q15 += 2; -} - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/CMakeLists.txt b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/CMakeLists.txt deleted file mode 100644 index c53f635..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/CMakeLists.txt +++ /dev/null @@ -1,30 +0,0 @@ -# -# Copyright (c) 2019-2021 Arm Limited. All rights reserved. -# -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the License); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an AS IS BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -project(CMSISNNActivation) - -file(GLOB SRC "./*_s8.c") -add_library(CMSISNNActivation STATIC ${SRC}) - -### Includes -target_include_directories(CMSISNNActivation PUBLIC "${NN}/Include") -target_include_directories(CMSISNNActivation PUBLIC "${ROOT}/CMSIS/Core/Include") -target_include_directories(CMSISNNActivation PUBLIC "${ROOT}/CMSIS/DSP/Include") - - - diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q15.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q15.c deleted file mode 100644 index cb8a08f..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q15.c +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_activations_q15.c - * Description: Q15 neural network activation function using direct table look-up - * - * $Date: 09. October 2020 - * $Revision: V.1.0.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nn_tables.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Acti - * @{ - */ - -/** - * @brief neural network activation function using direct table look-up - * - * @note Refer header file for details. - * - */ - -void arm_nn_activations_direct_q15(q15_t *data, uint16_t size, uint16_t int_width, arm_nn_activation_type type) -{ - uint16_t i = size; - q15_t *pIn = data; - q15_t *pOut = data; - uint16_t shift_size = 8 + 3 - int_width; - uint32_t bit_mask = 0x7FF >> int_width; - uint32_t full_frac = bit_mask + 1; - const q15_t *lookup_table; - - switch (type) - { - case ARM_SIGMOID: - lookup_table = sigmoidTable_q15; - break; - case ARM_TANH: - default: - lookup_table = tanhTable_q15; - break; - } - - while (i) - { - q15_t out; - q15_t in = *pIn++; - q15_t frac = (uint32_t)in & bit_mask; - q15_t value = lookup_table[(uint8_t)(in >> shift_size)]; - if ((in >> shift_size) != 0x7f) - { - q15_t value2 = lookup_table[(uint8_t)(1 + ((uint8_t)(in >> shift_size)))]; - /* doing the interpolation here for better accuracy */ - out = ((q31_t)(full_frac - frac) * value + (q31_t)value2 * frac) >> shift_size; - } - else - { - /* the largest positive value does not have a right side for linear interpolation */ - out = value; - } - - *pOut++ = out; - i--; - } -} - -/** - * @} end of Acti group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c deleted file mode 100644 index 72a0b15..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_nn_activations_q7.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_activations_q7.c - * Description: Q7 neural network activation function using direct table look-up - * - * $Date: 09. October 2020 - * $Revision: V.1.0.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nn_tables.h" -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Acti - * @{ - */ - -/** - * @brief Q7 neural network activation function using direct table look-up - * @param[in,out] data pointer to input - * @param[in] size number of elements - * @param[in] int_width bit-width of the integer part, assume to be smaller than 3 - * @param[in] type type of activation functions - * - * @details - * - * This is the direct table look-up approach. - * - * Assume here the integer part of the fixed-point is <= 3. - * More than 3 just not making much sense, makes no difference with - * saturation followed by any of these activation functions. - */ - -void arm_nn_activations_direct_q7(q7_t *data, uint16_t size, uint16_t int_width, arm_nn_activation_type type) -{ - uint16_t i = size; - q7_t *pIn = data; - q7_t *pOut = data; - q7_t in; - q7_t out; - uint16_t shift_size = 3 - int_width; - const q7_t *lookup_table; - switch (type) - { - case ARM_SIGMOID: - lookup_table = sigmoidTable_q7; - break; - case ARM_TANH: - default: - lookup_table = tanhTable_q7; - break; - } - while (i) - { - in = *pIn++; - out = lookup_table[(uint8_t)(in >> shift_size)]; - *pOut++ = out; - i--; - } -} - -/** - * @} end of Acti group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_relu6_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_relu6_s8.c deleted file mode 100644 index a460b30..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_relu6_s8.c +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_relu6_s8.c - * Description: Basic s8 version of ReLU6 - * - * $Date: 09. October 2020 - * $Revision: V.1.0.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Acti - * @{ - */ - -/* - * Basic ReLU6 function - * - * Refer to header file for details. - * - */ - -void arm_relu6_s8(q7_t *data, uint16_t size) -{ - int32_t i; - - for (i = 0; i < size; i++) - { - int32_t ip = data[i]; - - ip = MAX(ip, 0); - data[i] = MIN(ip, 6); - } -} - -/** - * @} end of Acti group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c deleted file mode 100644 index d62117c..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_relu_q15.c +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_relu_q15.c - * Description: Q15 version of ReLU - * - * $Date: 09. October 2020 - * $Revision: V.1.0.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Acti - * @{ - */ - -/** - * @brief Q15 RELU function - * @param[in,out] data pointer to input - * @param[in] size number of elements - * - * @details - * - * Optimized relu with QSUB instructions. - * - */ - -void arm_relu_q15(q15_t *data, uint16_t size) -{ - -#if defined(ARM_MATH_DSP) - /* Run the following code for M cores with DSP extension */ - - uint16_t i = size >> 1; - q15_t *input = data; - q15_t *output = data; - q31_t in; - q31_t buf; - q31_t mask; - - while (i) - { - in = read_q15x2_ia(&input); - - /* extract the first bit */ - buf = __ROR(in & 0x80008000, 15); - - /* if MSB=1, mask will be 0xFF, 0x0 otherwise */ - mask = __QSUB16(0x00000000, buf); - - arm_nn_write_q15x2_ia(&output, in & (~mask)); - i--; - } - - if (size & 0x1) - { - if (*input < 0) - { - *input = 0; - } - input++; - } -#else - /* Run the following code as reference implementation for M cores without DSP extension */ - uint16_t i; - - for (i = 0; i < size; i++) - { - if (data[i] < 0) - data[i] = 0; - } - -#endif /* ARM_MATH_DSP */ -} - -/** - * @} end of Acti group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c deleted file mode 100644 index a3163cd..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ActivationFunctions/arm_relu_q7.c +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_relu_q7.c - * Description: Q7 version of ReLU - * - * $Date: 20. July 2021 - * $Revision: V.1.1.3 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Acti - * @{ - */ - -/** - * @brief Q7 RELU function - * @param[in,out] data pointer to input - * @param[in] size number of elements - * - * @details - * - * Optimized relu with QSUB instructions. - * - */ - -void arm_relu_q7(q7_t *data, uint16_t size) -{ - -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Run the following code for M cores with DSP extension */ - - uint16_t i = size >> 2; - q7_t *input = data; - q7_t *output = data; - q31_t in; - q31_t buf; - q31_t mask; - - while (i) - { - in = arm_nn_read_q7x4_ia((const q7_t **)&input); - - /* extract the first bit */ - buf = (int32_t)__ROR((uint32_t)in & 0x80808080, 7); - - /* if MSB=1, mask will be 0xFF, 0x0 otherwise */ - mask = __QSUB8(0x00000000, buf); - - arm_nn_write_q7x4_ia(&output, in & (~mask)); - - i--; - } - - i = size & 0x3; - while (i) - { - if (*input < 0) - { - *input = 0; - } - input++; - i--; - } - -#else - /* Run the following code as reference implementation for cores without DSP extension */ - - uint16_t i; - - for (i = 0; i < size; i++) - { - if (data[i] < 0) - data[i] = 0; - } - -#endif -} - -/** - * @} end of Acti group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/CMakeLists.txt b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/CMakeLists.txt deleted file mode 100644 index fcd7a19..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/CMakeLists.txt +++ /dev/null @@ -1,31 +0,0 @@ -# -# Copyright (c) 2019-2021 Arm Limited. All rights reserved. -# -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the License); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an AS IS BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -project(CMSISNNBasicMaths) - -file(GLOB SRC "./*_*.c") - -add_library(CMSISNNBasicMaths STATIC ${SRC}) - -### Includes -target_include_directories(CMSISNNBasicMaths PUBLIC "${NN}/Include") -target_include_directories(CMSISNNBasicMaths PUBLIC "${ROOT}/CMSIS/Core/Include") -target_include_directories(CMSISNNBasicMaths PUBLIC "${ROOT}/CMSIS/DSP/Include") - - - diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s16.c deleted file mode 100644 index 6b1366d..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s16.c +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Copyright (C) 2022 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_elementwise_add_s16 - * Description: Elementwise add - * - * $Date: 14 Februari 2022 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup BasicMath - * @{ - */ - -/* - * s16 elementwise add - * - * Refer header file for details. - * - */ - -/* Note: __SHIFT is expected to be <=0 */ - -arm_status arm_elementwise_add_s16(const int16_t *input_1_vect, - const int16_t *input_2_vect, - const int32_t input_1_offset, - const int32_t input_1_mult, - const int32_t input_1_shift, - const int32_t input_2_offset, - const int32_t input_2_mult, - const int32_t input_2_shift, - const int32_t left_shift, - int16_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t out_activation_min, - const int32_t out_activation_max, - const int32_t block_size) -{ - (void)input_1_offset; - (void)input_2_offset; - (void)out_offset; - int32_t loop_count; - int32_t input_1; - int32_t input_2; - int32_t sum; - - loop_count = block_size; - - while (loop_count > 0) - { - /* C = A + B */ - input_1 = *input_1_vect++ << left_shift; - input_2 = *input_2_vect++ << left_shift; - - input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift); - input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift); - - sum = input_1 + input_2; - sum = arm_nn_requantize(sum, out_mult, out_shift); - - sum = MAX(sum, out_activation_min); - sum = MIN(sum, out_activation_max); - - *output++ = (int16_t)sum; - - /* Decrement loop counter */ - loop_count--; - } - - return (ARM_MATH_SUCCESS); -} - -/** - * @} end of BasicMath group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c deleted file mode 100644 index 85740ed..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_add_s8.c +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_elementwise_add_s8 - * Description: Element wise add - * - * $Date: 01. March 2021 - * $Revision: V.2.5.3 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" -#if defined(ARM_MATH_MVEI) -#include "arm_helium_utils.h" -#endif - -#if defined(ARM_MATH_MVEI) -#define SAT_INPUT_VECT(__INPUT_V, __MULT, __SHIFT) \ - __INPUT_V = arm_doubling_high_mult_mve(__INPUT_V, __MULT); \ - __INPUT_V = arm_divide_by_power_of_two_mve(__INPUT_V, -__SHIFT); -#endif - -/** - * @note The *_no_sat API does not mean that the input not saturated, Since - * __MULT is a positive integer, it is saturated. The API definition - * has more info about it. - */ -#define SAT_INPUT(__INPUT, __MULT, __SHIFT) \ - __INPUT = arm_nn_doubling_high_mult_no_sat(__INPUT, __MULT); \ - __INPUT = arm_nn_divide_by_power_of_two(__INPUT, -__SHIFT); - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup BasicMath - * @{ - */ - -/* - * s8 element wise add - * - * Refer header file for details. - * - */ - -/* Note: __SHIFT is expected to be <=0 */ - -arm_status arm_elementwise_add_s8(const int8_t *input_1_vect, - const int8_t *input_2_vect, - const int32_t input_1_offset, - const int32_t input_1_mult, - const int32_t input_1_shift, - const int32_t input_2_offset, - const int32_t input_2_mult, - const int32_t input_2_shift, - const int32_t left_shift, - int8_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t out_activation_min, - const int32_t out_activation_max, - const uint32_t block_size) -{ -#if defined(ARM_MATH_MVEI) - int32_t count = (int32_t)block_size; - - while (count > 0) - { - int32x4_t vect_1; - int32x4_t vect_2; - - mve_pred16_t p = vctp32q((uint32_t)count); - - vect_1 = vldrbq_z_s32(input_1_vect, p); - vect_2 = vldrbq_z_s32(input_2_vect, p); - - vect_1 = vaddq_s32(vect_1, vdupq_n_s32(input_1_offset)); - vect_2 = vaddq_s32(vect_2, vdupq_n_s32(input_2_offset)); - - vect_1 = vshlq_r_s32(vect_1, left_shift); - vect_2 = vshlq_r_s32(vect_2, left_shift); - - SAT_INPUT_VECT(vect_1, input_1_mult, input_1_shift); - SAT_INPUT_VECT(vect_2, input_2_mult, input_2_shift); - - vect_1 = vaddq_s32(vect_1, vect_2); - SAT_INPUT_VECT(vect_1, out_mult, out_shift); - - vect_1 = vaddq_n_s32(vect_1, out_offset); - - vect_1 = vmaxq_s32(vect_1, vdupq_n_s32(out_activation_min)); - vect_1 = vminq_s32(vect_1, vdupq_n_s32(out_activation_max)); - - input_1_vect += 4; - input_2_vect += 4; - vstrbq_p_s32(output, vect_1, p); - - output += 4; - count -= 4; - } -#else - uint32_t loop_count; - int32_t input_1; - int32_t input_2; - int32_t sum; - -#if defined(ARM_MATH_DSP) - int32_t a_1, b_1, a_2, b_2; - - int32_t offset_1_packed, offset_2_packed; - - int8_t r1, r2, r3, r4; - - offset_1_packed = (input_1_offset << 16U) | (input_1_offset & 0x0FFFFL); - offset_2_packed = (input_2_offset << 16U) | (input_2_offset & 0x0FFFFL); - - loop_count = block_size >> 2; - - while (loop_count > 0U) - { - /* 4 outputs are calculated in one loop. The order of calculation is follows the order of output sign extension - intrinsic */ - input_1_vect = read_and_pad_reordered(input_1_vect, &b_1, &a_1); - input_2_vect = read_and_pad_reordered(input_2_vect, &b_2, &a_2); - - a_1 = __SADD16(a_1, offset_1_packed); - b_1 = __SADD16(b_1, offset_1_packed); - - a_2 = __SADD16(a_2, offset_2_packed); - b_2 = __SADD16(b_2, offset_2_packed); - - /* Sum 1 */ - input_1 = (b_1 & 0x0FFFF) << left_shift; - - SAT_INPUT(input_1, input_1_mult, input_1_shift); - - input_2 = (b_2 & 0x0FFFF) << left_shift; - SAT_INPUT(input_2, input_2_mult, input_2_shift); - - sum = input_1 + input_2; - SAT_INPUT(sum, out_mult, out_shift); - sum += out_offset; - sum = MAX(sum, out_activation_min); - sum = MIN(sum, out_activation_max); - r1 = (q7_t)sum; - - /* Sum 3 */ - input_1 = ((b_1 >> 16) & 0x0FFFF) << left_shift; - SAT_INPUT(input_1, input_1_mult, input_1_shift); - - input_2 = ((b_2 >> 16) & 0x0FFFF) << left_shift; - SAT_INPUT(input_2, input_2_mult, input_2_shift); - - sum = input_1 + input_2; - SAT_INPUT(sum, out_mult, out_shift); - sum += out_offset; - sum = MAX(sum, out_activation_min); - sum = MIN(sum, out_activation_max); - r3 = (q7_t)sum; - - /* Sum 2 */ - input_1 = (a_1 & 0x0FFFF) << left_shift; - SAT_INPUT(input_1, input_1_mult, input_1_shift); - - input_2 = (a_2 & 0x0FFFF) << left_shift; - SAT_INPUT(input_2, input_2_mult, input_2_shift); - - sum = input_1 + input_2; - SAT_INPUT(sum, out_mult, out_shift); - sum += out_offset; - sum = MAX(sum, out_activation_min); - sum = MIN(sum, out_activation_max); - r2 = (q7_t)sum; - - /* Sum 4 */ - input_1 = ((a_1 >> 16) & 0x0FFFF) << left_shift; - SAT_INPUT(input_1, input_1_mult, input_1_shift); - - input_2 = ((a_2 >> 16) & 0x0FFFF) << left_shift; - SAT_INPUT(input_2, input_2_mult, input_2_shift); - - sum = input_1 + input_2; - SAT_INPUT(sum, out_mult, out_shift); - sum += out_offset; - sum = MAX(sum, out_activation_min); - sum = MIN(sum, out_activation_max); - r4 = (q7_t)sum; - - write_q7x4_ia(&output, __PACKq7(r1, r2, r3, r4)); - - loop_count--; - } - - loop_count = block_size & 0x3; -#else - loop_count = block_size; -#endif - - while (loop_count > 0U) - { - /* C = A + B */ - - input_1 = (*input_1_vect++ + input_1_offset) << left_shift; - input_2 = (*input_2_vect++ + input_2_offset) << left_shift; - - input_1 = arm_nn_doubling_high_mult(input_1, input_1_mult); - input_1 = arm_nn_divide_by_power_of_two(input_1, -input_1_shift); - - input_2 = arm_nn_doubling_high_mult(input_2, input_2_mult); - input_2 = arm_nn_divide_by_power_of_two(input_2, -input_2_shift); - - sum = input_1 + input_2; - SAT_INPUT(sum, out_mult, out_shift); - sum += out_offset; - - sum = MAX(sum, out_activation_min); - sum = MIN(sum, out_activation_max); - - *output++ = (q7_t)sum; - - /* Decrement loop counter */ - loop_count--; - } - -#endif /* ARM_MATH_MVEI */ - - return (ARM_MATH_SUCCESS); -} - -/** - * @} end of BasicMath group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s16.c deleted file mode 100644 index 4e25574..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s16.c +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (C) 2022 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_elementwise_mul_s16 - * Description: Element wise multiplication - * - * $Date: 14 Februari 2022 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup BasicMath - * @{ - */ - -/** - * @brief s16 element wise multiplication of two vectors - * - * @note Refer header file for details. - * - */ -arm_status arm_elementwise_mul_s16(const int16_t *input_1_vect, - const int16_t *input_2_vect, - const int32_t input_1_offset, - const int32_t input_2_offset, - int16_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t out_activation_min, - const int32_t out_activation_max, - const int32_t block_size) -{ - (void)input_1_offset; - (void)input_2_offset; - (void)out_offset; - int32_t loop_count; - int32_t input_1; - int32_t input_2; - int32_t mul_res; - - loop_count = block_size; - - while (loop_count > 0) - { - /* C = A * B */ - - input_1 = *input_1_vect++; - input_2 = *input_2_vect++; - - mul_res = input_1 * input_2; - mul_res = arm_nn_requantize(mul_res, out_mult, out_shift); - - mul_res = MAX(mul_res, out_activation_min); - mul_res = MIN(mul_res, out_activation_max); - - *output++ = (int16_t)mul_res; - - /* Decrement loop counter */ - loop_count--; - } - - return ARM_MATH_SUCCESS; -} - -/** - * @} end of BasicMath group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c deleted file mode 100644 index 7c560fe..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/BasicMathFunctions/arm_elementwise_mul_s8.c +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_elementwise_mul_s8 - * Description: Element wise multiplication - * - * $Date: January 26, 2021 - * $Revision: V.1.0.5 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup BasicMath - * @{ - */ - -/** - * @brief s8 element wise multiplication of two vectors - * - * @note Refer header file for details. - * - */ - -arm_status arm_elementwise_mul_s8(const int8_t *input_1_vect, - const int8_t *input_2_vect, - const int32_t input_1_offset, - const int32_t input_2_offset, - int8_t *output, - const int32_t out_offset, - const int32_t out_mult, - const int32_t out_shift, - const int32_t out_activation_min, - const int32_t out_activation_max, - const uint32_t block_size) -{ - - int32_t loop_count; -#if defined(ARM_MATH_MVEI) - - loop_count = (block_size + 3) / 4; - uint32_t num_elements = block_size; - - for (int i = 0; i < loop_count; i++) - { - mve_pred16_t p = vctp32q(num_elements); - - int32x4_t input_1 = vldrbq_z_s32(input_1_vect, p); - input_1 = vaddq_n_s32(input_1, input_1_offset); - - int32x4_t input_2 = vldrbq_z_s32(input_2_vect, p); - input_2 = vaddq_n_s32(input_2, input_2_offset); - - int32x4_t res_0 = vmulq_s32(input_1, input_2); - - res_0 = arm_requantize_mve_32x4(res_0, vdupq_n_s32(out_mult), vdupq_n_s32(out_shift)); - - res_0 += vdupq_n_s32(out_offset); - - res_0 = vmaxq_s32(res_0, vdupq_n_s32(out_activation_min)); - res_0 = vminq_s32(res_0, vdupq_n_s32(out_activation_max)); - - vstrbq_p_s32(output, res_0, p); - input_1_vect += 4; - input_2_vect += 4; - output += 4; - num_elements -= 4; - } - -#else - int32_t input_1; - int32_t input_2; - int32_t mul_res; - -#if defined(ARM_MATH_DSP) - int32_t a_1, b_1, a_2, b_2; - - int32_t offset_1_packed, offset_2_packed; - - int8_t r1, r2, r3, r4; - - offset_1_packed = (input_1_offset << 16U) | (input_1_offset & 0x0FFFFL); - offset_2_packed = (input_2_offset << 16U) | (input_2_offset & 0x0FFFFL); - - loop_count = block_size >> 2; - - while (loop_count > 0) - { - /* 4 outputs are calculated in one loop. The order of calculation is follows the order of output sign extension - intrinsic */ - input_1_vect = read_and_pad_reordered(input_1_vect, &b_1, &a_1); - input_2_vect = read_and_pad_reordered(input_2_vect, &b_2, &a_2); - - a_1 = __SADD16(a_1, offset_1_packed); - b_1 = __SADD16(b_1, offset_1_packed); - - a_2 = __SADD16(a_2, offset_2_packed); - b_2 = __SADD16(b_2, offset_2_packed); - - /* Mul 1 */ - input_1 = (int16_t)(b_1 & 0x0FFFFL); - input_2 = (int16_t)(b_2 & 0x0FFFFL); - - mul_res = input_1 * input_2; - mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset; - - mul_res = MAX(mul_res, out_activation_min); - mul_res = MIN(mul_res, out_activation_max); - r1 = (q7_t)mul_res; - - /* Mul 3 */ - input_1 = (int16_t)((b_1 >> 16U) & 0x0FFFFL); - input_2 = (int16_t)((b_2 >> 16U) & 0x0FFFFL); - - mul_res = input_1 * input_2; - mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset; - mul_res = MAX(mul_res, out_activation_min); - mul_res = MIN(mul_res, out_activation_max); - r3 = (q7_t)mul_res; - - /* Mul 2 */ - input_1 = (int16_t)(a_1 & 0x0FFFFL); - input_2 = (int16_t)(a_2 & 0x0FFFFL); - - mul_res = input_1 * input_2; - mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset; - mul_res = MAX(mul_res, out_activation_min); - mul_res = MIN(mul_res, out_activation_max); - r2 = (q7_t)mul_res; - - /* Mul 4 */ - input_1 = (int16_t)((a_1 >> 16U) & 0x0FFFFL); - input_2 = (int16_t)((a_2 >> 16U) & 0x0FFFFL); - - mul_res = input_1 * input_2; - mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset; - mul_res = MAX(mul_res, out_activation_min); - mul_res = MIN(mul_res, out_activation_max); - r4 = (q7_t)mul_res; - - write_q7x4_ia(&output, __PACKq7(r1, r2, r3, r4)); - - loop_count--; - } - - loop_count = block_size & 0x3; -#else - loop_count = block_size; -#endif - - while (loop_count > 0) - { - /* C = A * B */ - - input_1 = *input_1_vect++ + input_1_offset; - input_2 = *input_2_vect++ + input_2_offset; - - mul_res = input_1 * input_2; - mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset; - - mul_res = MAX(mul_res, out_activation_min); - mul_res = MIN(mul_res, out_activation_max); - - *output++ = (q7_t)mul_res; - - /* Decrement loop counter */ - loop_count--; - } -#endif - return ARM_MATH_SUCCESS; -} - -/** - * @} end of BasicMath group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/CMakeLists.txt b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/CMakeLists.txt deleted file mode 100644 index 4150df3..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/CMakeLists.txt +++ /dev/null @@ -1,98 +0,0 @@ -# -# Copyright (c) 2019-2021 Arm Limited. -# -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the License); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an AS IS BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -SET(ROOT ${CMSIS_PATH}) - -# Select which parts of the CMSIS-DSP must be compiled. -# There are some dependencies between the parts but they are not tracked -# by this cmake. So, enabling some functions may require to enable some -# other ones. -option(CONCATENATION "Concatenation" ON) -option(FULLYCONNECTED "Fully Connected" ON) -option(CONVOLUTION "Convolutions" ON) -option(ACTIVATION "Activations" ON) -option(POOLING "Pooling" ON) -option(SOFTMAX "Softmax" ON) -option(BASICMATHSNN "Basic Maths for NN" ON) -option(RESHAPE "Reshape" ON) -option(SVDF "SVDF" ON) - -# When OFF it is the default behavior : all tables are included. -option(NNSUPPORT "NN Support" ON) - - -########################### -# -# CMSIS NN -# -########################### - -# NN Sources -SET(NN ${ROOT}/CMSIS/NN) - -list(APPEND CMAKE_MODULE_PATH ${NN}/Source) - -add_library(cmsis-nn STATIC) - -target_compile_options(cmsis-nn PRIVATE -Ofast) - -### Includes -target_include_directories(cmsis-nn PUBLIC "${NN}/Include") -target_include_directories(cmsis-nn PUBLIC "${ROOT}/CMSIS/Core/Include") -target_include_directories(cmsis-nn PUBLIC "${ROOT}/CMSIS/DSP/Include") - -if (BASICMATHSNN) - add_subdirectory(BasicMathFunctions) -endif() - -if (CONCATENATION) - add_subdirectory(ConcatenationFunctions) -endif() - -if (FULLYCONNECTED) - add_subdirectory(FullyConnectedFunctions) -endif() - -if (CONVOLUTION) - add_subdirectory(ConvolutionFunctions) -endif() - -if (ACTIVATION) - add_subdirectory(ActivationFunctions) -endif() - -if (POOLING) - add_subdirectory(PoolingFunctions) -endif() - -if (SOFTMAX) - add_subdirectory(SoftmaxFunctions) -endif() - -if (SVDF) - add_subdirectory(SVDFunctions) -endif() - -if (RESHAPE) - add_subdirectory(ReshapeFunctions) -endif() - -# Keep NNSUPPORT at the end -if (NNSUPPORT) - add_subdirectory(NNSupportFunctions) -endif() diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/CMakeLists.txt b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/CMakeLists.txt deleted file mode 100644 index 9d3f543..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/CMakeLists.txt +++ /dev/null @@ -1,20 +0,0 @@ -# -# Copyright (c) 2019-2021 Arm Limited. -# -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the License); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an AS IS BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -file(GLOB SRC "./*_*.c") -target_sources(cmsis-nn PRIVATE ${SRC}) diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_w.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_w.c deleted file mode 100644 index 257e6a6..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_w.c +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_concatenation_s8_w.c - * Description: s8 version of concatenation along the W axis - * - * $Date: October 2019 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Concatenation - * @{ - */ - -/* - * s8 version of concatenation along the W axis - * - * Refer to header file for details. - * - */ -void arm_concatenation_s8_w(const int8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_z, - const uint16_t input_w, - int8_t *output, - const uint32_t offset_w) -{ - const uint32_t input_copy_size = input_x * input_y * input_z * input_w; - - output += offset_w * (input_x * input_y * input_z); - - arm_memcpy_q7(output, input, input_copy_size); -} - -/** - * @} end of Concatenation group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_x.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_x.c deleted file mode 100644 index 7e8487a..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_x.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_concatenation_s8_x.c - * Description: s8 version of concatenation along the X axis - * - * $Date: October 2019 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Concatenation - * @{ - */ - -/* - * s8 version of concatenation along the X axis - * - * Refer to header file for details. - * - */ -void arm_concatenation_s8_x(const int8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_z, - const uint16_t input_w, - int8_t *output, - const uint16_t output_x, - const uint32_t offset_x) -{ - const uint32_t num_iterations = input_y * input_z * input_w; - - output += offset_x; - - uint32_t i; - - // Copy per row - for (i = 0; i < num_iterations; ++i) - { - arm_memcpy_q7(output, input, input_x); - input += input_x; - output += output_x; - } -} - -/** - * @} end of Concatenation group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_y.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_y.c deleted file mode 100644 index 075a702..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_y.c +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_concatenation_s8_y.c - * Description: s8 version of concatenation along the Y axis - * - * $Date: October 2019 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Concatenation - * @{ - */ - -/* - * s8 version of concatenation along the Y axis - * - * Refer to header file for details. - * - */ -void arm_concatenation_s8_y(const int8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_z, - const uint16_t input_w, - int8_t *output, - const uint16_t output_y, - const uint32_t offset_y) -{ - const uint32_t num_iterations = input_z * input_w; - const uint32_t input_copy_size = input_x * input_y; - const uint32_t output_stride = input_x * output_y; - - output += offset_y * input_x; - uint32_t i; - - // Copy per tile - for (i = 0; i < num_iterations; ++i) - { - arm_memcpy_q7(output, input, input_copy_size); - input += input_copy_size; - output += output_stride; - } -} - -/** - * @} end of Concatenation group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_z.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_z.c deleted file mode 100644 index 3bd84f2..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConcatenationFunctions/arm_concatenation_s8_z.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_concatenation_s8_z.c - * Description: s8 version of concatenation along the Z axis - * - * $Date: October 2019 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Concatenation - * @{ - */ - -/* - * s8 version of concatenation along the Z axis - * - * Refer to header file for details. - * - */ -void arm_concatenation_s8_z(const int8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_z, - const uint16_t input_w, - int8_t *output, - const uint16_t output_z, - const uint32_t offset_z) -{ - const uint32_t input_copy_size = input_x * input_y * input_z; - const uint32_t output_stride = input_x * input_y * output_z; - - output += offset_z * (input_x * input_y); - - uint32_t i; - - for (i = 0; i < input_w; ++i) - { - arm_memcpy_q7(output, input, input_copy_size); - input += input_copy_size; - output += output_stride; - } -} - -/** - * @} end of Concatenation group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/CMakeLists.txt b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/CMakeLists.txt deleted file mode 100644 index 30be0fe..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -# -# Copyright (c) 2019-2022 Arm Limited. -# -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the License); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an AS IS BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -file(GLOB SRC "./*_s8*.c") -file(GLOB SRC_S16 "./*_s16*.c") -target_sources(cmsis-nn PRIVATE ${SRC} ${SRC_S16}) - - - diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c deleted file mode 100644 index a3edd40..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1_x_n_s8.c +++ /dev/null @@ -1,205 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_1_x_n_s8.c - * Description: s8 version of 1xN convolution using symmetric quantization. - * - * $Date: December 14, 2021 - * $Revision: V.2.1.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/* - * 1xN s8 convolution function. - * - * Refer header file for details. - * - */ - -arm_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx, - const cmsis_nn_conv_params *conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data) -{ - (void)bias_dims; - arm_status status = ARM_MATH_SUCCESS; - if (output_dims->w % 4 != 0) - { - status = ARM_MATH_SIZE_MISMATCH; - goto out; - } - -#if defined(ARM_MATH_MVEI) - (void)ctx; - - const uint16_t input_x = input_dims->w; - const uint16_t kernel_x = filter_dims->w; - const uint16_t output_x = output_dims->w; - const uint16_t output_ch = output_dims->c; - const uint16_t input_ch = input_dims->c; - const uint16_t pad_x = conv_params->padding.w; - const uint16_t stride_x = conv_params->stride.w; - - const int32_t input_offset = conv_params->input_offset; - const int32_t out_offset = conv_params->output_offset; - const int32_t out_activation_min = conv_params->activation.min; - const int32_t out_activation_max = conv_params->activation.max; - int32_t *output_mult = quant_params->multiplier; - int32_t *output_shift = quant_params->shift; - - for (int i_out_x = 0; i_out_x <= (output_x - 4); i_out_x += 4) - { - int32_t input_begin_idx[4]; - int32_t ker_begin_idx[4]; - int32_t ker_end_idx[4]; - - for (int i = 0; i < 4; i++) - { - const int32_t est_input_x_idx = stride_x * (i_out_x + i) - pad_x; - input_begin_idx[i] = MAX(0, est_input_x_idx); - ker_begin_idx[i] = MAX(0, -est_input_x_idx); - ker_end_idx[i] = MIN(kernel_x, input_x - est_input_x_idx); - } - - if ((ker_begin_idx[0] != 0) || (ker_end_idx[3] != kernel_x)) - { - for (int i_out_ch = 0; i_out_ch < output_ch; i_out_ch++) - { - int32x4_t s_offset; - int32_t acc[4]; - { - int32_t sum_row[4]; - - (void)arm_nn_mat_mul_core_1x_s8((ker_end_idx[0] - ker_begin_idx[0]) * input_ch, - input_data + input_begin_idx[0] * input_ch, - filter_data + (input_ch * kernel_x * i_out_ch) + - (ker_begin_idx[0] * input_ch), - &sum_row[0], - &acc[0]); - (void)arm_nn_mat_mul_core_1x_s8((ker_end_idx[1] - ker_begin_idx[1]) * input_ch, - input_data + input_begin_idx[1] * input_ch, - filter_data + (input_ch * kernel_x * i_out_ch) + - (ker_begin_idx[1] * input_ch), - &sum_row[1], - &acc[1]); - - (void)arm_nn_mat_mul_core_1x_s8((ker_end_idx[2] - ker_begin_idx[2]) * input_ch, - input_data + input_begin_idx[2] * input_ch, - filter_data + (input_ch * kernel_x * i_out_ch) + - (ker_begin_idx[2] * input_ch), - &sum_row[2], - &acc[2]); - - (void)arm_nn_mat_mul_core_1x_s8((ker_end_idx[3] - ker_begin_idx[3]) * input_ch, - input_data + input_begin_idx[3] * input_ch, - filter_data + (input_ch * kernel_x * i_out_ch) + - (ker_begin_idx[3] * input_ch), - &sum_row[3], - &acc[3]); - - s_offset = vldrwq_s32(sum_row); - } - int32x4_t res = vldrwq_s32(acc); - s_offset = vmulq_n_s32(s_offset, input_offset); - res = vaddq_s32(res, s_offset); - if (bias_data) - { - res = vaddq_n_s32(res, bias_data[i_out_ch]); - } - res = arm_requantize_mve(res, output_mult[i_out_ch], output_shift[i_out_ch]); - res = vaddq_n_s32(res, out_offset); - - res = vmaxq_s32(res, vdupq_n_s32(out_activation_min)); - res = vminq_s32(res, vdupq_n_s32(out_activation_max)); - - const uint32x4_t scatter_offset = {0, output_ch, output_ch * 2, output_ch * 3}; - vstrbq_scatter_offset_s32(output_data, scatter_offset, res); - output_data++; - } - output_data += (3 * output_ch); - } - else - { - output_data = arm_nn_mat_mul_core_4x_s8(kernel_x * input_ch, - stride_x * input_ch, - input_data + input_begin_idx[0] * input_ch, - filter_data, - output_ch, - conv_params, - quant_params, - bias_data, - output_data); - } - } - -#else - status = arm_convolve_s8(ctx, - conv_params, - quant_params, - input_dims, - input_data, - filter_dims, - filter_data, - bias_dims, - bias_data, - output_dims, - output_data); -#endif - -out: - /* Return to application */ - return status; -} - -int32_t arm_convolve_1_x_n_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims) -{ -#if !defined(ARM_MATH_MVEI) - return (2 * input_dims->c * filter_dims->w * filter_dims->h) * sizeof(int16_t); -#else - (void)input_dims; - (void)filter_dims; - return 0; -#endif -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c deleted file mode 100644 index 3db3ba4..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_HWC_q7_fast_nonsquare.c +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_1x1_HWC_q7_fast_nonsquare.c - * Description: Fast Q7 version of 1x1 convolution (non-square shape) - * - * $Date: July 20, 2021 - * $Revision: V.1.1.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/** - * @brief Fast Q7 version of 1x1 convolution (non-sqaure shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimention x - * @param[in] dim_im_in_y input tensor dimention y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * This function is optimized for convolution with 1x1 kernel size (i.e., dim_kernel_x=1 - * and dim_kernel_y=1). It can be used for the second half of MobileNets [1] after depthwise - * separable convolution. - * - * This function is the version with full list of optimization tricks, but with - * some constraints: - * ch_im_in is multiple of 4 - * ch_im_out is multiple of 2 - * - * [1] MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications - * https://arxiv.org/abs/1704.04861 - */ - -arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t *Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t *Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t *bufferA, - q7_t *bufferB) -{ - (void)bufferB; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - (void)dim_im_in_y; - int16_t i_out_y, i_out_x; - int16_t i_ch_out; - - /* ----------------------- - * Here we use bufferA as q15_t internally as computation are done with q15_t level - * im2col are done to output in q15_t format from q7_t input - */ - - q15_t *pBuffer = bufferA; - q7_t *pOut = Im_out; - - if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0 || dim_kernel_x != 1 || dim_kernel_y != 1 || padding_x != 0 || - padding_y != 0 || stride_x != 1 || stride_y != 1) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++) - { - /* This part implements the im2col function */ - arm_q7_to_q15_reordered_no_shift( - (q7_t *)Im_in + (i_out_y * dim_im_in_x + i_out_x) * ch_im_in, pBuffer, ch_im_in); - pBuffer += ch_im_in; - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y) - { - pOut = arm_nn_mat_mult_kernel_q7_q15_reordered( - wt, bufferA, ch_im_out, ch_im_in, bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - } - - /* check if there is left-over for compute */ - if (pBuffer != bufferA) - { - const q7_t *pA = wt; - for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++) - { - q31_t sum = ((q31_t)(bias[i_ch_out]) << bias_shift) + NN_ROUND(out_shift); - const q15_t *pB = bufferA; - /* basically each time it process 4 entries */ - uint16_t colCnt = ch_im_in * dim_kernel_x * dim_kernel_y >> 2; - - while (colCnt) - { - - q31_t inA1, inA2; - q31_t inB1, inB2; - - pA = read_and_pad_reordered(pA, &inA1, &inA2); - - inB1 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inA1, inB1, sum); - inB2 = arm_nn_read_q15x2_ia(&pB); - - sum = __SMLAD(inA2, inB2, sum); - - colCnt--; - } - colCnt = ch_im_in * dim_kernel_y * dim_kernel_x & 0x3; - while (colCnt) - { - q7_t inA1 = *pA++; - q15_t inB1 = *pB++; - sum += inA1 * inB1; - colCnt--; - } - *pOut = (q7_t)__SSAT((sum >> out_shift), 8); - pOut++; - } - } - -#else - (void)bufferA; - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - int i, j, k, l, m, n; - int conv_out; - int in_row, in_col; - - if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0 || dim_kernel_x != 1 || dim_kernel_y != 1 || padding_x != 0 || - padding_y != 0 || stride_x != 1 || stride_y != 1) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - for (i = 0; i < ch_im_out; i++) - { - for (j = 0; j < dim_im_out_y; j++) - { - for (k = 0; k < dim_im_out_x; k++) - { - conv_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); - for (m = 0; m < dim_kernel_y; m++) - { - for (n = 0; n < dim_kernel_x; n++) - { - // if-for implementation - in_row = stride_y * j + m - padding_y; - in_col = stride_x * k + n - padding_x; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x) - { - for (l = 0; l < ch_im_in; l++) - { - conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + l] * - wt[i * ch_im_in * dim_kernel_y * dim_kernel_x + (m * dim_kernel_y + n) * ch_im_in + - l]; - } - } - } - } - Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q7_t)__SSAT((conv_out >> out_shift), 8); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c deleted file mode 100644 index 6183f55..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_1x1_s8_fast.c +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_1x1_s8_fast.c - * Description: Fast q7 version of 1x1 convolution (non-square shape) - * - * $Date: 12. November 2021 - * $Revision: V.2.0.4 - * - * Target Processor: Cortex-M Processors - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" -#include - -#define DIM_KER_X (1U) -#define DIM_KER_Y (1U) - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/* - * Fast s8 version for 1x1 convolution (non-square shape) - * - * Refer header file for details. - * - */ - -arm_status arm_convolve_1x1_s8_fast(const cmsis_nn_context *ctx, - const cmsis_nn_conv_params *conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data) -{ - if (input_dims->c % 4 != 0 || conv_params->padding.w != 0 || conv_params->padding.h != 0 || - conv_params->stride.w != 1 || conv_params->stride.h != 1) - { - return ARM_MATH_SIZE_MISMATCH; - } - - (void)ctx; - (void)filter_dims; - (void)bias_dims; - -#if defined(ARM_MATH_MVEI) - - const int32_t col_len = input_dims->w * input_dims->h * input_dims->n; - const int32_t output_ch = output_dims->c; - const int32_t input_ch = input_dims->c; - const int32_t input_offset = conv_params->input_offset; - const int32_t out_offset = conv_params->output_offset; - const int32_t out_activation_min = conv_params->activation.min; - const int32_t out_activation_max = conv_params->activation.max; - int32_t *output_mult = quant_params->multiplier; - int32_t *output_shift = quant_params->shift; - - for (int i_items = 0; i_items <= (col_len - 4); i_items += 4) - { - - output_data = arm_nn_mat_mul_core_4x_s8(input_ch, - input_ch, - input_data + i_items * input_ch, - filter_data, - output_ch, - conv_params, - quant_params, - bias_data, - output_data); - } - - /* Handle left over elements */ - for (int i_items = (col_len & ~0x3); i_items < col_len; i_items++) - { - for (int i_out_ch = 0; i_out_ch < output_ch; i_out_ch++) - { - int32_t sum_row = 0; - int32_t acc; - (void)arm_nn_mat_mul_core_1x_s8( - input_ch, input_data + i_items * input_ch, filter_data + i_out_ch * input_ch, &sum_row, &acc); - if (bias_data) - { - acc += bias_data[i_out_ch]; - } - sum_row = (sum_row * input_offset); - acc += sum_row; - acc = arm_nn_requantize(acc, output_mult[i_out_ch], output_shift[i_out_ch]); - acc += out_offset; - - acc = MAX(acc, out_activation_min); - acc = MIN(acc, out_activation_max); - *output_data++ = acc; - } - } - -#else - /* Run the following code as reference implementation for Cortex-M processors with or without DSP extension */ - - const int32_t lhs_rows = input_dims->w * input_dims->h * input_dims->n; - const int32_t rhs_rows = output_dims->c; - const int32_t rhs_cols = input_dims->c; - - arm_nn_mat_mult_nt_t_s8(input_data, - filter_data, - bias_data, - output_data, - quant_params->multiplier, - quant_params->shift, - lhs_rows, - rhs_rows, - rhs_cols, - conv_params->input_offset, - conv_params->output_offset, - conv_params->activation.min, - conv_params->activation.max); - -#endif - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -int32_t arm_convolve_1x1_s8_fast_get_buffer_size(const cmsis_nn_dims *input_dims) -{ - (void)input_dims; - return 0; -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c deleted file mode 100644 index 0a6868a..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_basic.c +++ /dev/null @@ -1,209 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_HWC_q15_basic.c - * Description: Q15 version of convolution - * - * $Date: July 20, 2021 - * $Revision: V.1.1.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/** - * @brief Basic Q15 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * - * Buffer size: - * - * bufferA size: ch_im_in*dim_kernel*dim_kernel - * - * bufferB size: 0 - * - * This basic version is designed to work for any input tensor and weight - * dimension. - */ - -arm_status arm_convolve_HWC_q15_basic(const q15_t *Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q15_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q15_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q15_t *Im_out, - const uint16_t dim_im_out, - q15_t *bufferA, - q7_t *bufferB) -{ - (void)bufferB; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; - - uint16_t im2col_out_pixel_index = 0; - q15_t *pBuffer = bufferA; - q15_t *pOut = Im_out; - q15_t *im_buffer = bufferA; - const q15_t *pA; - int i; - - /* This part implements the im2col function */ - for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++) - { - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in) - { - /* Filling 0 for out-of-bound paddings */ - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t) * ch_im_in); - } - else - { - /* arm_copy_q15((q15_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, - * ch_im_in); */ - memcpy(pBuffer, - (q15_t *)Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, - sizeof(q15_t) * ch_im_in); - } - pBuffer += ch_im_in; - } - } - - pA = wt; - for (i = 0; i < ch_im_out; i++) - { - q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - const q15_t *pB = im_buffer; - uint16_t colCnt = ch_im_in * dim_kernel * dim_kernel >> 2; - while (colCnt) - { - q31_t inA1 = arm_nn_read_q15x2_ia(&pA); - q31_t inB1 = arm_nn_read_q15x2_ia(&pB); - q31_t inA2 = arm_nn_read_q15x2_ia(&pA); - q31_t inB2 = arm_nn_read_q15x2_ia(&pB); - - sum = __SMLAD(inA1, inB1, sum); - sum = __SMLAD(inA2, inB2, sum); - - colCnt--; - } - colCnt = ch_im_in * dim_kernel * dim_kernel & 0x3; - while (colCnt) - { - q15_t inA1 = *pA++; - q15_t inB1 = *pB++; - sum += inA1 * inB1; - colCnt--; - } - *pOut = (q15_t)__SSAT((sum >> out_shift), 16); - pOut++; - } - - /* counter reset */ - pBuffer = im_buffer; - im2col_out_pixel_index++; - } - } - -#else - (void)bufferA; - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - int i, j, k, l, m, n; - int conv_out; - int in_row, in_col; - - for (i = 0; i < ch_im_out; i++) - { - for (j = 0; j < dim_im_out; j++) - { - for (k = 0; k < dim_im_out; k++) - { - conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - for (m = 0; m < dim_kernel; m++) - { - for (n = 0; n < dim_kernel; n++) - { - in_row = stride * j + m - padding; - in_col = stride * k + n - padding; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in) - { - for (l = 0; l < ch_im_in; l++) - { - conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + l] * - wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + n) * ch_im_in + l]; - } - } - } - } - Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q15_t)__SSAT((conv_out >> out_shift), 16); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c deleted file mode 100644 index 66fbc00..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast.c +++ /dev/null @@ -1,259 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_HWC_q15_fast.c - * Description: Fast Q15 version of convolution - * - * $Date: July 20, 2021 - * $Revision: V.1.1.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/** - * @brief Fast Q15 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * @details - * - * Buffer size: - * - * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - * - * bufferB size: 0 - * - * Input dimension constraints: - * - * ch_im_in is multiple of 2 - * - * ch_im_out is multiple of 2 - * - * dim_im_out is a multiple of 2 - * - */ - -arm_status arm_convolve_HWC_q15_fast(const q15_t *Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q15_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q15_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q15_t *Im_out, - const uint16_t dim_im_out, - q15_t *bufferA, - q7_t *bufferB) -{ - (void)bufferB; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; - - q15_t *pBuffer = bufferA; - q15_t *im_buffer = bufferA; - q15_t *pOut = Im_out; - - if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0 || dim_im_out & 0x1) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - /* This part implements the im2col function */ - for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++) - { - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t) * ch_im_in); - } - else - { - /* arm_copy_q15((q15_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, - * ch_im_in); */ - memcpy(pBuffer, - (q15_t *)Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, - sizeof(q15_t) * ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (i_out_x & 0x1) - { - int i; - /* initialize the matrix pointers for A */ - const q15_t *pA = wt; - - /* set up the second output pointers */ - q15_t *pOut2 = pOut + ch_im_out; - - /* this loop over rows in A */ - for (i = 0; i < ch_im_out; i += 2) - { - /* setup pointers for B */ - const q15_t *pB = im_buffer; - const q15_t *pB2 = pB + ch_im_in * dim_kernel * dim_kernel; - - /* aling the second pointer for A */ - const q15_t *pA2 = pA + ch_im_in * dim_kernel * dim_kernel; - - /* init the sum with bias */ - q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = ch_im_in * dim_kernel * dim_kernel >> 1; - /* accumulate over the vector */ - while (colCnt) - { - q31_t inA1 = arm_nn_read_q15x2_ia(&pA); - q31_t inB1 = arm_nn_read_q15x2_ia(&pB); - q31_t inA2 = arm_nn_read_q15x2_ia(&pA2); - q31_t inB2 = arm_nn_read_q15x2_ia(&pB2); - - sum = __SMLAD(inA1, inB1, sum); - sum2 = __SMLAD(inA1, inB2, sum2); - sum3 = __SMLAD(inA2, inB1, sum3); - sum4 = __SMLAD(inA2, inB2, sum4); - - colCnt--; - } /* while over colCnt */ - colCnt = ch_im_in * dim_kernel * dim_kernel & 0x1; - while (colCnt) - { - q15_t inA1 = *pA++; - q15_t inB1 = *pB++; - q15_t inA2 = *pA2++; - q15_t inB2 = *pB2++; - - sum += inA1 * inB1; - sum2 += inA1 * inB2; - sum3 += inA2 * inB1; - sum4 += inA2 * inB2; - colCnt--; - } /* while over colCnt */ - *pOut++ = (q15_t)__SSAT(sum >> out_shift, 16); - *pOut++ = (q15_t)__SSAT(sum3 >> out_shift, 16); - *pOut2++ = (q15_t)__SSAT(sum2 >> out_shift, 16); - *pOut2++ = (q15_t)__SSAT(sum4 >> out_shift, 16); - - /* skip the row computed with A2 */ - pA += ch_im_in * dim_kernel * dim_kernel; - } /* for over ch_im_out */ - - pOut += ch_im_out; - /* counter reset */ - pBuffer = im_buffer; - } - } - } - -#else - (void)bufferA; - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - int i, j, k, l, m, n; - int conv_out; - int in_row, in_col; - - if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - for (i = 0; i < ch_im_out; i++) - { - for (j = 0; j < dim_im_out; j++) - { - for (k = 0; k < dim_im_out; k++) - { - conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - for (m = 0; m < dim_kernel; m++) - { - for (n = 0; n < dim_kernel; n++) - { - in_row = stride * j + m - padding; - in_col = stride * k + n - padding; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in) - { - for (l = 0; l < ch_im_in; l++) - { - conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + l] * - wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + n) * ch_im_in + l]; - } - } - } - } - Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q15_t)__SSAT((conv_out >> out_shift), 16); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c deleted file mode 100644 index 7babe51..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q15_fast_nonsquare.c +++ /dev/null @@ -1,270 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_HWC_q15_fast.c - * Description: Fast Q15 version of convolution - * - * $Date: July 20, 2021 - * $Revision: V.1.1.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/** - * @brief Fast Q15 convolution function (non-sqaure shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimention x - * @param[in] dim_im_in_y input tensor dimention y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * @details - * - * Buffer size: - * - * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - * - * bufferB size: 0 - * - * Input dimension constraints: - * - * ch_im_in is multiple of 2 - * - * ch_im_out is multiple of 2 - * - */ - -arm_status arm_convolve_HWC_q15_fast_nonsquare(const q15_t *Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q15_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q15_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q15_t *Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t *bufferA, - q7_t *bufferB) -{ - (void)bufferB; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; - - q15_t *pBuffer = bufferA; - q15_t *im_buffer = bufferA; - q15_t *pOut = Im_out; - - if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - /* This part implements the im2col function */ - for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++) - { - for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y; - i_ker_y++) - { - for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x; - i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t) * ch_im_in); - } - else - { - /* arm_copy_q15((q15_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer, - * ch_im_in); */ - memcpy(pBuffer, - (q15_t *)Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, - sizeof(q15_t) * ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (i_out_x & 0x1) - { - int i; - /* initialize the matrix pointers for A */ - const q15_t *pA = wt; - - /* set up the second output pointers */ - q15_t *pOut2 = pOut + ch_im_out; - - /* this loop over rows in A */ - for (i = 0; i < ch_im_out; i += 2) - { - /* setup pointers for B */ - const q15_t *pB = im_buffer; - const q15_t *pB2 = pB + ch_im_in * dim_kernel_y * dim_kernel_x; - - /* aling the second pointer for A */ - const q15_t *pA2 = pA + ch_im_in * dim_kernel_y * dim_kernel_x; - - /* init the sum with bias */ - q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = ch_im_in * dim_kernel_y * dim_kernel_x >> 1; - /* accumulate over the vector */ - while (colCnt) - { - q31_t inA1 = arm_nn_read_q15x2_ia(&pA); - q31_t inB1 = arm_nn_read_q15x2_ia(&pB); - q31_t inA2 = arm_nn_read_q15x2_ia(&pA2); - q31_t inB2 = arm_nn_read_q15x2_ia(&pB2); - - sum = __SMLAD(inA1, inB1, sum); - sum2 = __SMLAD(inA1, inB2, sum2); - sum3 = __SMLAD(inA2, inB1, sum3); - sum4 = __SMLAD(inA2, inB2, sum4); - - colCnt--; - } /* while over colCnt */ - colCnt = ch_im_in * dim_kernel_y * dim_kernel_x & 0x1; - while (colCnt) - { - q15_t inA1 = *pA++; - q15_t inB1 = *pB++; - q15_t inA2 = *pA2++; - q15_t inB2 = *pB2++; - - sum += inA1 * inB1; - sum2 += inA1 * inB2; - sum3 += inA2 * inB1; - sum4 += inA2 * inB2; - colCnt--; - } /* while over colCnt */ - *pOut++ = (q15_t)__SSAT(sum >> out_shift, 16); - *pOut++ = (q15_t)__SSAT(sum3 >> out_shift, 16); - *pOut2++ = (q15_t)__SSAT(sum2 >> out_shift, 16); - *pOut2++ = (q15_t)__SSAT(sum4 >> out_shift, 16); - - /* skip the row computed with A2 */ - pA += ch_im_in * dim_kernel_y * dim_kernel_x; - } /* for over ch_im_out */ - - pOut += ch_im_out; - /* counter reset */ - pBuffer = im_buffer; - } - } - } - -#else - (void)bufferA; - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - int i, j, k, l, m, n; - int conv_out; - int in_row, in_col; - - if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - for (i = 0; i < ch_im_out; i++) - { - for (j = 0; j < dim_im_out_y; j++) - { - for (k = 0; k < dim_im_out_x; k++) - { - conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - for (m = 0; m < dim_kernel_y; m++) - { - for (n = 0; n < dim_kernel_x; n++) - { - in_row = stride_y * j + m - padding_y; - in_col = stride_x * k + n - padding_x; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x) - { - for (l = 0; l < ch_im_in; l++) - { - conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + l] * - wt[i * ch_im_in * dim_kernel_x * dim_kernel_y + (m * dim_kernel_x + n) * ch_im_in + - l]; - } - } - } - } - Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q15_t)__SSAT((conv_out >> out_shift), 16); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c deleted file mode 100644 index 618f492..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_RGB.c +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_HWC_q7_RGB.c - * Description: Q7 version of convolution for RGB image - * - * $Date: July 20, 2021 - * $Revision: V.1.1.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/** - * @brief Q7 convolution function for RGB image - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * @details - * - * Buffer size: - * - * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - * - * bufferB size: 0 - * - * Input dimension constraints: - * - * ch_im_in equals 3 - * - * This kernel is written exclusively for convolution with ch_im_in - * equals 3. This applies on the first layer of CNNs which has input - * image with RGB format. - */ - -arm_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t *Im_out, - const uint16_t dim_im_out, - q15_t *bufferA, - q7_t *bufferB) -{ - (void)bufferB; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; - - /* - * Here we use bufferA as q15_t internally as computation are done with q15_t level - * im2col are done to output in q15_t format from q7_t input - */ - q15_t *pBuffer = bufferA; - q7_t *pOut = Im_out; - - // check if number of input channels is 3 - if (ch_im_in != 3) - { - return ARM_MATH_SIZE_MISMATCH; - } - // This part implements the im2col function - for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++) - { - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in) - { - /* Equivalent to arm_fill_q15(0, pBuffer, ch_im_in) with assumption: ch_im_in = 3 */ - arm_memset_q7((q7_t *)pBuffer, (q7_t)0, 3 * sizeof(q15_t)); - pBuffer += 3; - } - else - { - /* - * Equivalent to: - * arm_q7_to_q15_no_shift( (q7_t*)Im_in+(i_ker_y*dim_im_in+i_ker_x)*3, pBuffer, 3); - */ - - const q7_t *pPixel = Im_in + (i_ker_y * dim_im_in + i_ker_x) * 3; - q31_t buf = arm_nn_read_q7x4(pPixel); - - union arm_nnword top; - union arm_nnword bottom; - - top.word = __SXTB16(buf); - bottom.word = __SXTB16(__ROR(buf, 8)); - -#ifndef ARM_MATH_BIG_ENDIAN - /* - * little-endian, | omit | 3rd | 2nd | 1st | - * MSB LSB - * top | 3rd | 1st |; bottom | omit | 2nd | - * - * version 1, need to swap 2nd and 3rd weight - * *__SIMD32(pBuffer) = top.word; - * *(pBuffer+2) = bottom.half_words[0]; - * - * version 2, no weight shuffling required - */ - *pBuffer++ = top.half_words[0]; - int32_t packed_word = __PKHBT(bottom.word, top.word, 0); - arm_memcpy_q7((q7_t *)pBuffer, (q7_t *)&packed_word, 4); -#else - /* - * big-endian, | 1st | 2nd | 3rd | omit | - * MSB LSB - * top | 2nd | omit |; bottom | 1st | 3rd | - * - * version 1, need to swap 2nd and 3rd weight - * *__SIMD32(pBuffer) = bottom.word; - * *(pBuffer+2) = top.half_words[1]; - * - * version 2, no weight shuffling required - */ - *pBuffer++ = bottom.half_words[0]; - int32_t packed_word = __PKHTB(top.word, bottom.word, 0); - arm_memcpy_q7((q7_t *)pBuffer, (q7_t *)&packed_word, 4); -#endif - pBuffer += 2; - } - } - } - - if (pBuffer == bufferA + 2 * 3 * dim_kernel * dim_kernel) - { - pOut = arm_nn_mat_mult_kernel_q7_q15( - wt, bufferA, ch_im_out, 3 * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); - - /* counter reset */ - pBuffer = bufferA; - } - } - } - - /* left-over because odd number of output pixels */ - if (pBuffer != bufferA) - { - const q7_t *pA = wt; - int i; - - for (i = 0; i < ch_im_out; i++) - { - q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - q15_t *pB = bufferA; - /* basically each time it process 4 entries */ - uint16_t colCnt = 3 * dim_kernel * dim_kernel >> 2; - - while (colCnt) - { - - q31_t inA1, inA2; - q31_t inB1, inB2; - - pA = read_and_pad(pA, &inA1, &inA2); - - inB1 = arm_nn_read_q15x2_ia((const q15_t **)&pB); - sum = __SMLAD(inA1, inB1, sum); - inB2 = arm_nn_read_q15x2_ia((const q15_t **)&pB); - sum = __SMLAD(inA2, inB2, sum); - - colCnt--; - } - colCnt = 3 * dim_kernel * dim_kernel & 0x3; - while (colCnt) - { - q7_t inA1 = *pA++; - q15_t inB1 = *pB++; - sum += inA1 * inB1; - colCnt--; - } - *pOut++ = (q7_t)__SSAT((sum >> out_shift), 8); - } - } -#else - (void)bufferA; - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - int i, j, k, l, m, n; - int conv_out; - int in_row, in_col; - - // check if number of input channels is 3 - if (ch_im_in != 3) - { - return ARM_MATH_SIZE_MISMATCH; - } - - for (i = 0; i < ch_im_out; i++) - { - for (j = 0; j < dim_im_out; j++) - { - for (k = 0; k < dim_im_out; k++) - { - conv_out = (bias[i] << bias_shift) + NN_ROUND(out_shift); - for (m = 0; m < dim_kernel; m++) - { - for (n = 0; n < dim_kernel; n++) - { - /* if-for implementation */ - in_row = stride * j + m - padding; - in_col = stride * k + n - padding; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in) - { - for (l = 0; l < ch_im_in; l++) - { - conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + l] * - wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + n) * ch_im_in + l]; - } - } - } - } - Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q7_t)__SSAT((conv_out >> out_shift), 8); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return (ARM_MATH_SUCCESS); -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c deleted file mode 100644 index e274413..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic.c +++ /dev/null @@ -1,227 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_HWC_q7_basic.c - * Description: Q7 version of convolution - * - * $Date: 20. July 2021 - * $Revision: V.1.1.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/** - * @brief Basic Q7 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * - * Buffer size: - * - * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - * - * bufferB size: 0 - * - * This basic version is designed to work for any input tensor and weight - * dimension. - */ - -arm_status arm_convolve_HWC_q7_basic(const q7_t *Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t *Im_out, - const uint16_t dim_im_out, - q15_t *bufferA, - q7_t *bufferB) -{ - (void)bufferB; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; - - /* - * Here we use bufferA as q15_t internally as computation are done with q15_t level - * im2col are done to output in q15_t format from q7_t input - */ - q15_t *pBuffer = bufferA; - q7_t *pOut = Im_out; - - /* This part implements the im2col function */ - for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++) - { - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in) - { - /* Filling 0 for out-of-bound paddings */ - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t) * ch_im_in); - } - else - { - /* Copying the pixel data to column */ - arm_q7_to_q15_no_shift( - (q7_t *)Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - /* Computation is filed for every 2 columns */ - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel) - { - pOut = arm_nn_mat_mult_kernel_q7_q15( - wt, bufferA, ch_im_out, ch_im_in * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); - - /* counter reset */ - pBuffer = bufferA; - } - } - } - - /* left-over because odd number of output pixels */ - if (pBuffer != bufferA) - { - const q7_t *pA = wt; - int i; - - for (i = 0; i < ch_im_out; i++) - { - /* Load the accumulator with bias first */ - q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - - /* Point to the beging of the im2col buffer */ - const q15_t *pB = bufferA; - - /* Each time it process 4 entries */ - uint16_t colCnt = ch_im_in * dim_kernel * dim_kernel >> 2; - - while (colCnt) - { - q31_t inA1, inA2; - q31_t inB1, inB2; - - pA = read_and_pad(pA, &inA1, &inA2); - - inB1 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inA1, inB1, sum); - inB2 = arm_nn_read_q15x2_ia(&pB); - - sum = __SMLAD(inA2, inB2, sum); - - colCnt--; - } - colCnt = ch_im_in * dim_kernel * dim_kernel & 0x3; - while (colCnt) - { - q7_t inA1 = *pA++; - q15_t inB1 = *pB++; - sum += inA1 * inB1; - colCnt--; - } - *pOut++ = (q7_t)__SSAT((sum >> out_shift), 8); - } - } -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - (void)bufferA; - int i, j, k, l, m, n; - int conv_out; - int in_row, in_col; - - for (i = 0; i < ch_im_out; i++) - { - for (j = 0; j < dim_im_out; j++) - { - for (k = 0; k < dim_im_out; k++) - { - conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - for (m = 0; m < dim_kernel; m++) - { - for (n = 0; n < dim_kernel; n++) - { - // if-for implementation - in_row = stride * j + m - padding; - in_col = stride * k + n - padding; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in) - { - for (l = 0; l < ch_im_in; l++) - { - conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + l] * - wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + n) * ch_im_in + l]; - } - } - } - } - Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q7_t)__SSAT((conv_out >> out_shift), 8); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c deleted file mode 100644 index b42a57d..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_basic_nonsquare.c +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_HWC_q7_basic.c - * Description: Q7 version of convolution - * - * $Date: July 20, 2021 - * $Revision: V.1.1.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/** - * @brief Basic Q7 convolution function (non-sqaure shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimention x - * @param[in] dim_im_in_y input tensor dimention y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns ARM_MATH_SUCCESS - */ - -arm_status arm_convolve_HWC_q7_basic_nonsquare(const q7_t *Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t *Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t *bufferA, - q7_t *bufferB) -{ - (void)bufferB; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; - - /* - * Here we use bufferA as q15_t internally as computation are done with q15_t level - * im2col are done to output in q15_t format from q7_t input - */ - q15_t *pBuffer = bufferA; - q7_t *pOut = Im_out; - - /* This part implements the im2col function */ - for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++) - { - for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y; - i_ker_y++) - { - for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x; - i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x) - { - /* Filling 0 for out-of-bound paddings */ - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t) * ch_im_in); - } - else - { - /* Copying the pixel data to column */ - arm_q7_to_q15_no_shift( - (q7_t *)Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - /* Computation is filed for every 2 columns */ - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_y * dim_kernel_x) - { - pOut = arm_nn_mat_mult_kernel_q7_q15( - wt, bufferA, ch_im_out, ch_im_in * dim_kernel_y * dim_kernel_x, bias_shift, out_shift, bias, pOut); - - /* counter reset */ - pBuffer = bufferA; - } - } - } - - /* left-over because odd number of output pixels */ - if (pBuffer != bufferA) - { - const q7_t *pA = wt; - int i; - - for (i = 0; i < ch_im_out; i++) - { - /* Load the accumulator with bias first */ - q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - - /* Point to the beging of the im2col buffer */ - const q15_t *pB = bufferA; - - /* Each time it process 4 entries */ - uint16_t colCnt = ch_im_in * dim_kernel_y * dim_kernel_x >> 2; - - while (colCnt) - { - q31_t inA1, inA2; - q31_t inB1, inB2; - - pA = read_and_pad(pA, &inA1, &inA2); - - inB1 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inA1, inB1, sum); - inB2 = arm_nn_read_q15x2_ia(&pB); - - sum = __SMLAD(inA2, inB2, sum); - - colCnt--; - } - colCnt = ch_im_in * dim_kernel_y * dim_kernel_x & 0x3; - while (colCnt) - { - q7_t inA1 = *pA++; - q15_t inB1 = *pB++; - sum += inA1 * inB1; - colCnt--; - } - *pOut++ = (q7_t)__SSAT((sum >> out_shift), 8); - } - } -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - (void)bufferA; - int i, j, k, l, m, n; - int conv_out; - int in_row, in_col; - - for (i = 0; i < ch_im_out; i++) - { - for (j = 0; j < dim_im_out_y; j++) - { - for (k = 0; k < dim_im_out_x; k++) - { - conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - for (m = 0; m < dim_kernel_y; m++) - { - for (n = 0; n < dim_kernel_x; n++) - { - // if-for implementation - in_row = stride_y * j + m - padding_y; - in_col = stride_x * k + n - padding_x; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x) - { - for (l = 0; l < ch_im_in; l++) - { - conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + l] * - wt[i * ch_im_in * dim_kernel_y * dim_kernel_x + (m * dim_kernel_x + n) * ch_im_in + - l]; - } - } - } - } - Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q7_t)__SSAT((conv_out >> out_shift), 8); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c deleted file mode 100644 index 51d98fd..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast.c +++ /dev/null @@ -1,380 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_HWC_q7_fast.c - * Description: Fast Q7 version of convolution - * - * $Date: July 20, 2021 - * $Revision: V.1.1.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/** - * @brief Fast Q7 convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * @details - * - * Buffer size: - * - * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - * - * bufferB size: 0 - * - * Input dimension constraints: - * - * ch_im_in is multiple of 4 ( because of the SIMD32 read and swap ) - * - * ch_im_out is multiple of 2 ( bacause 2x2 mat_mult kernel ) - * - * The im2col converts the Q7 tensor input into Q15 column, which is stored in - * bufferA. There is reordering happenning during this im2col process with - * arm_q7_to_q15_reordered_no_shift. For every four elements, the second and - * third elements are swapped. - * - * The computation kernel arm_nn_mat_mult_kernel_q7_q15_reordered does the - * GEMM computation with the reordered columns. - * - * To speed-up the determination of the padding condition, we split the - * computation into 3x3 parts, i.e., {top, mid, bottom} X {left, mid, right}. - * This reduces the total number of boundary condition checks and improves - * the data copying performance. - */ - -arm_status arm_convolve_HWC_q7_fast(const q7_t *Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t *Im_out, - const uint16_t dim_im_out, - q15_t *bufferA, - q7_t *bufferB) -{ - (void)bufferB; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; - - /* - * Here we use bufferA as q15_t internally as computation are done with q15_t level - * im2col are done to output in q15_t format from q7_t input - */ - - q15_t *pBuffer = bufferA; - q7_t *pOut = Im_out; - - if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - /* - * Here we split the entire matrix into three regions depending on the padding situation - * Top: i_out_y from 0 to padding - 1 - * Middle: i_out_y from padding to dim_im_out-padding-1 - * Bottom: i_out_y from dim_im_out-padding to dim_im_out-1 - */ - - /* top part */ - for (i_out_y = 0; i_out_y < padding; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t) * ch_im_in); - } - else - { - arm_q7_to_q15_reordered_no_shift( - (q7_t *)Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel) - { - pOut = arm_nn_mat_mult_kernel_q7_q15_reordered( - wt, bufferA, ch_im_out, ch_im_in * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - } - - /* middle part, here we also divide the x into left, mid and right */ - for (; i_out_y < dim_im_out - padding; i_out_y++) - { - - /* left part */ - for (i_out_x = 0; i_out_x < padding; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++) - { - if (i_ker_x < 0 || i_ker_x >= dim_im_in) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t) * ch_im_in); - } - else - { - arm_q7_to_q15_reordered_no_shift( - (q7_t *)Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel) - { - pOut = arm_nn_mat_mult_kernel_q7_q15_reordered( - wt, bufferA, ch_im_out, ch_im_in * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - - /* mid part */ - for (; i_out_x < dim_im_out - padding; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - arm_q7_to_q15_reordered_no_shift((q7_t *)Im_in + - (i_ker_y * dim_im_in + i_out_x * stride - padding) * ch_im_in, - pBuffer, - ch_im_in * dim_kernel); - pBuffer += ch_im_in * dim_kernel; - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel) - { - pOut = arm_nn_mat_mult_kernel_q7_q15_reordered( - wt, bufferA, ch_im_out, ch_im_in * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - - /* right part */ - for (; i_out_x < dim_im_out; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++) - { - if (i_ker_x < 0 || i_ker_x >= dim_im_in) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t) * ch_im_in); - } - else - { - arm_q7_to_q15_reordered_no_shift( - (q7_t *)Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel) - { - pOut = arm_nn_mat_mult_kernel_q7_q15_reordered( - wt, bufferA, ch_im_out, ch_im_in * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - } - - for (; i_out_y < dim_im_out; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t) * ch_im_in); - } - else - { - arm_q7_to_q15_reordered_no_shift( - (q7_t *)Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel) - { - pOut = arm_nn_mat_mult_kernel_q7_q15_reordered( - wt, bufferA, ch_im_out, ch_im_in * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - } - - /* check if there is left-over for compute */ - if (pBuffer != bufferA) - { - const q7_t *pA = wt; - int i; - - for (i = 0; i < ch_im_out; i++) - { - q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift); - const q15_t *pB = bufferA; - /* each time it process 4 entries */ - uint16_t colCnt = ch_im_in * dim_kernel * dim_kernel >> 2; - - while (colCnt) - { - - q31_t inA1, inA2; - q31_t inB1, inB2; - - pA = read_and_pad_reordered(pA, &inA1, &inA2); - - inB1 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inA1, inB1, sum); - inB2 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inA2, inB2, sum); - - colCnt--; - } - colCnt = ch_im_in * dim_kernel * dim_kernel & 0x3; - while (colCnt) - { - q7_t inA1 = *pA++; - q15_t inB1 = *pB++; - sum += inA1 * inB1; - colCnt--; - } - *pOut = (q7_t)__SSAT((sum >> out_shift), 8); - pOut++; - } - } -#else - (void)bufferA; - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - int i, j, k, l, m, n; - int conv_out; - int in_row, in_col; - - if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - for (i = 0; i < ch_im_out; i++) - { - for (j = 0; j < dim_im_out; j++) - { - for (k = 0; k < dim_im_out; k++) - { - conv_out = (bias[i] << bias_shift) + NN_ROUND(out_shift); - for (m = 0; m < dim_kernel; m++) - { - for (n = 0; n < dim_kernel; n++) - { - // if-for implementation - in_row = stride * j + m - padding; - in_col = stride * k + n - padding; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in) - { - for (l = 0; l < ch_im_in; l++) - { - conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + l] * - wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + n) * ch_im_in + l]; - } - } - } - } - Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q7_t)__SSAT((conv_out >> out_shift), 8); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c deleted file mode 100644 index 25f17bb..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_HWC_q7_fast_nonsquare.c +++ /dev/null @@ -1,378 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_HWC_q7_fast_nonsquare.c - * Description: Fast Q7 version of convolution (non-sqaure shape) - * - * $Date: July 20, 2021 - * $Revision: V.1.1.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/** - * @brief Fast Q7 convolution function (non-sqaure shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimention x - * @param[in] dim_im_in_y input tensor dimention y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding size x - * @param[in] padding_y padding size y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * This function is the version with full list of optimization tricks, but with - * some constraints: - * ch_im_in is multiple of 4 - * ch_im_out is multiple of 2 - */ - -arm_status arm_convolve_HWC_q7_fast_nonsquare(const q7_t *Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t *Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t *bufferA, - q7_t *bufferB) -{ - (void)bufferB; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - int16_t i_out_y, i_out_x, i_ker_y, i_ker_x; - - /* ----------------------- - * Here we use bufferA as q15_t internally as computation are done with q15_t level - * im2col are done to output in q15_t format from q7_t input - */ - - q15_t *pBuffer = bufferA; - q7_t *pOut = Im_out; - - if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - /* - * Here we split the entire matrix into three regions depending on the padding situation - * Top: i_out_y from 0 to padding - 1 - * Middle: i_out_y from padding to dim_im_out-padding-1 - * Bottom: i_out_y from dim_im_out-padding to dim_im_out-1 - */ - - /* top part */ - for (i_out_y = 0; i_out_y < padding_y; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y; - i_ker_y++) - { - for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x; - i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t) * ch_im_in); - } - else - { - arm_q7_to_q15_reordered_no_shift( - (q7_t *)Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y) - { - pOut = arm_nn_mat_mult_kernel_q7_q15_reordered( - wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - } - - /* middle part, here we also divide the x into left, mid and right */ - for (; i_out_y < dim_im_out_y - padding_y; i_out_y++) - { - - /* left part */ - for (i_out_x = 0; i_out_x < padding_x; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y; - i_ker_y++) - { - for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x; - i_ker_x++) - { - if (i_ker_x < 0 || i_ker_x >= dim_im_in_x) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t) * ch_im_in); - } - else - { - arm_q7_to_q15_reordered_no_shift( - (q7_t *)Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y) - { - pOut = arm_nn_mat_mult_kernel_q7_q15_reordered( - wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - - /* mid part */ - for (; i_out_x < dim_im_out_x - padding_x; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y; - i_ker_y++) - { - arm_q7_to_q15_reordered_no_shift( - (q7_t *)Im_in + (i_ker_y * dim_im_in_x + i_out_x * stride_x - padding_x) * ch_im_in, - pBuffer, - ch_im_in * dim_kernel_x); - pBuffer += ch_im_in * dim_kernel_x; - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y) - { - pOut = arm_nn_mat_mult_kernel_q7_q15_reordered( - wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - - /* right part */ - for (; i_out_x < dim_im_out_x; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y; - i_ker_y++) - { - for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x; - i_ker_x++) - { - if (i_ker_x < 0 || i_ker_x >= dim_im_in_x) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t) * ch_im_in); - } - else - { - arm_q7_to_q15_reordered_no_shift( - (q7_t *)Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y) - { - pOut = arm_nn_mat_mult_kernel_q7_q15_reordered( - wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - } - - for (; i_out_y < dim_im_out_y; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++) - { - /* This part implements the im2col function */ - for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y; - i_ker_y++) - { - for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x; - i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x) - { - /* arm_fill_q15(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, sizeof(q15_t) * ch_im_in); - } - else - { - arm_q7_to_q15_reordered_no_shift( - (q7_t *)Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y) - { - pOut = arm_nn_mat_mult_kernel_q7_q15_reordered( - wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, bias_shift, out_shift, bias, pOut); - /* counter reset */ - pBuffer = bufferA; - } - } - } - - /* check if there is left-over for compute */ - if (pBuffer != bufferA) - { - const q7_t *pA = wt; - int i; - for (i = 0; i < ch_im_out; i++) - { - q31_t sum = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); - const q15_t *pB = bufferA; - /* basically each time it process 4 entries */ - uint16_t colCnt = ch_im_in * dim_kernel_x * dim_kernel_y >> 2; - - while (colCnt) - { - - q31_t inA1, inA2; - q31_t inB1, inB2; - - pA = read_and_pad_reordered(pA, &inA1, &inA2); - - inB1 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inA1, inB1, sum); - inB2 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inA2, inB2, sum); - - colCnt--; - } - colCnt = (ch_im_in * dim_kernel_y * dim_kernel_x) & 0x3; - while (colCnt) - { - q7_t inA1 = *pA++; - q15_t inB1 = *pB++; - sum += inA1 * inB1; - colCnt--; - } - *pOut = (q7_t)__SSAT((sum >> out_shift), 8); - pOut++; - } - } - -#else - (void)bufferA; - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - int i, j, k, l, m, n; - int conv_out; - int in_row, in_col; - - if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0) - { - /* check if the input dimension meets the constraints */ - return ARM_MATH_SIZE_MISMATCH; - } - - for (i = 0; i < ch_im_out; i++) - { - for (j = 0; j < dim_im_out_y; j++) - { - for (k = 0; k < dim_im_out_x; k++) - { - conv_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); - for (m = 0; m < dim_kernel_y; m++) - { - for (n = 0; n < dim_kernel_x; n++) - { - /* if-for implementation */ - in_row = stride_y * j + m - padding_y; - in_col = stride_x * k + n - padding_x; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x) - { - for (l = 0; l < ch_im_in; l++) - { - conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + l] * - wt[i * ch_im_in * dim_kernel_y * dim_kernel_x + (m * dim_kernel_x + n) * ch_im_in + - l]; - } - } - } - } - Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q7_t)__SSAT((conv_out >> out_shift), 8); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_fast_s16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_fast_s16.c deleted file mode 100644 index f509f26..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_fast_s16.c +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_fast_s16.c - * Description: Optimized s16 version of convolution. - * - * $Date: 12 August 2021 - * $Revision: V.1.1.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/* - * Basic s16 convolution function. - * - * Refer header file for details. Optimal use case for the DSP/MVE implementation is when input and output channels - * are multiples of 4 or atleast greater than 4. - * - */ - -arm_status arm_convolve_fast_s16(const cmsis_nn_context *ctx, - const cmsis_nn_conv_params *conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q15_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int64_t *bias_data, - const cmsis_nn_dims *output_dims, - q15_t *output_data) -{ - (void)bias_dims; - if (filter_dims->w * filter_dims->h * input_dims->c >= 512) - { - return ARM_MATH_SIZE_MISMATCH; - } - - if (ctx->buf == NULL && arm_convolve_s8_get_buffer_size(input_dims, filter_dims) > 0) - { - return ARM_MATH_ARGUMENT_ERROR; - } - q15_t *buffer_a = (q15_t *)ctx->buf; - - const int32_t input_batches = input_dims->n; - const int32_t input_x = input_dims->w; - const int32_t input_y = input_dims->h; - const int32_t input_ch = input_dims->c; - const int32_t kernel_x = filter_dims->w; - const int32_t kernel_y = filter_dims->h; - const int32_t output_x = output_dims->w; - const int32_t output_y = output_dims->h; - const int32_t output_ch = output_dims->c; - - const int32_t pad_x = conv_params->padding.w; - const int32_t pad_y = conv_params->padding.h; - const int32_t stride_x = conv_params->stride.w; - const int32_t stride_y = conv_params->stride.h; - - const int16_t out_activation_min = conv_params->activation.min; - const int16_t out_activation_max = conv_params->activation.max; - int32_t *output_mult = quant_params->multiplier; - int32_t *output_shift = quant_params->shift; - - for (int i_batch = 0; i_batch < input_batches; i_batch++) - { -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Generate two columns from the input tensor a GEMM computation */ - q15_t *two_column_buf = buffer_a; - q15_t *out = output_data; - /* This part implements the im2col function */ - for (int32_t i_out_y = 0; i_out_y < output_y; i_out_y++) - { - for (int32_t i_out_x = 0; i_out_x < output_x; i_out_x++) - { - for (int32_t i_ker_y = i_out_y * stride_y - pad_y; i_ker_y < i_out_y * stride_y - pad_y + kernel_y; - i_ker_y++) - { - for (int32_t i_ker_x = i_out_x * stride_x - pad_x; i_ker_x < i_out_x * stride_x - pad_x + kernel_x; - i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= input_y || i_ker_x < 0 || i_ker_x >= input_x) - { - /* Filling 0 for out-of-bound paddings */ - arm_memset_q7((q7_t *)two_column_buf, 0, sizeof(q15_t) * input_ch); - } - else - { - arm_memcpy_q7((q7_t *)two_column_buf, - (const q7_t *)(input_data + (i_ker_y * input_x + i_ker_x) * input_ch), - input_ch * sizeof(q15_t)); - } - two_column_buf += input_ch; - } - } - /* Computation is filed for every 2 columns */ - if (two_column_buf == buffer_a + 2 * input_ch * kernel_y * kernel_x) - { - out = arm_nn_mat_mult_kernel_s16(filter_data, - buffer_a, - output_ch, - output_shift, - output_mult, - out_activation_min, - out_activation_max, - (input_ch * kernel_y * kernel_x), - bias_data, - out); - - /* Counter reset */ - two_column_buf = buffer_a; - } - } - } - - /* Left-over because odd number of output pixels */ - if (two_column_buf != buffer_a) - { - const q7_t *ker_a = filter_data; - int i; - - for (i = 0; i < output_ch; i++) - { - /* Init the accumulator*/ - q31_t sum = 0; - - /* Point to the beginning of the im2col buffer where the input is available as a rearranged column */ - const q15_t *ip_as_col = buffer_a; - - /* 4 multiply and accumulates are done in one loop. */ - uint16_t col_count = (input_ch * kernel_y * kernel_x) >> 2; - - while (col_count) - { - q31_t ker_a1, ker_a2; - q31_t ip_b1, ip_b2; - - ker_a = read_and_pad(ker_a, &ker_a1, &ker_a2); - - ip_b1 = arm_nn_read_q15x2_ia(&ip_as_col); - sum = __SMLAD(ker_a1, ip_b1, sum); - ip_b2 = arm_nn_read_q15x2_ia(&ip_as_col); - sum = __SMLAD(ker_a2, ip_b2, sum); - - col_count--; - } - /* Handle left over mac */ - col_count = input_ch * kernel_y * kernel_x & 0x3; - while (col_count) - { - q7_t ker_a1 = *ker_a++; - q15_t ip_b1 = *ip_as_col++; - sum += ker_a1 * ip_b1; - col_count--; - } - if (bias_data) - { - q31_t reduced_multiplier = REDUCE_MULTIPLIER(output_mult[i]); - q63_t acc_64 = sum + bias_data[i]; - sum = arm_nn_requantize_s64(acc_64, reduced_multiplier, output_shift[i]); - } - else - { - sum = arm_nn_requantize(sum, output_mult[i], output_shift[i]); - } - sum = MAX(sum, out_activation_min); - sum = MIN(sum, out_activation_max); - *out++ = (q15_t)sum; - } - } -#else - (void)input_data; - (void)output_data; - (void)bias_data; - (void)filter_data; - (void)buffer_a; - (void)kernel_x; - (void)kernel_y; - (void)pad_x; - (void)pad_y; - (void)stride_x; - (void)stride_y; - (void)out_activation_min; - (void)out_activation_max; - (void)output_mult; - (void)output_shift; - return ARM_MATH_ARGUMENT_ERROR; -#endif - /* Advance to the next batch */ - input_data += (input_x * input_y * input_ch); - output_data += (output_x * output_y * output_ch); - } - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -int32_t arm_convolve_fast_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims) -{ -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - return (2 * input_dims->c * filter_dims->w * filter_dims->h) * (int32_t)sizeof(int16_t); -#else - (void)input_dims; - (void)filter_dims; - return 0; -#endif -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s16.c deleted file mode 100644 index 9702575..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s16.c +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Copyright (C) 2010-2022 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_s16.c - * Description: s16 version of convolution using symmetric quantization. - * - * $Date: January 13, 2022 - * $Revision: V.1.1.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/* - * Basic s16 convolution function. - * - * Refer header file for details. Optimal use case for the DSP/MVE implementation is when input and output channels - * are multiples of 4 or atleast greater than 4. - * - */ - -arm_status arm_convolve_s16(const cmsis_nn_context *ctx, - const cmsis_nn_conv_params *conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q15_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int64_t *bias_data, - const cmsis_nn_dims *output_dims, - q15_t *output_data) -{ - (void)bias_dims; - (void)ctx; - - const int32_t input_batches = input_dims->n; - const int32_t input_x = input_dims->w; - const int32_t input_y = input_dims->h; - const int32_t input_ch = input_dims->c; - const int32_t kernel_x = filter_dims->w; - const int32_t kernel_y = filter_dims->h; - const int32_t output_x = output_dims->w; - const int32_t output_y = output_dims->h; - const int32_t output_ch = output_dims->c; - - const int32_t pad_x = conv_params->padding.w; - const int32_t pad_y = conv_params->padding.h; - const int32_t stride_x = conv_params->stride.w; - const int32_t stride_y = conv_params->stride.h; - const int32_t dilation_x = conv_params->dilation.w; - const int32_t dilation_y = conv_params->dilation.h; - - const int32_t out_activation_min = conv_params->activation.min; - const int32_t out_activation_max = conv_params->activation.max; - int32_t *output_mult = quant_params->multiplier; - int32_t *output_shift = quant_params->shift; - - for (int i_batch = 0; i_batch < input_batches; i_batch++) - { - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - for (int32_t i_out_ch = 0; i_out_ch < output_ch; i_out_ch++) - { - const q31_t reduced_multiplier = REDUCE_MULTIPLIER(output_mult[i_out_ch]); - - for (int32_t base_idx_y = -pad_y, i_out_y = 0; i_out_y < output_y; base_idx_y += stride_y, i_out_y++) - { - for (int32_t base_idx_x = -pad_x, i_out_x = 0; i_out_x < output_x; base_idx_x += stride_x, i_out_x++) - { - int64_t conv_out_acc = 0; - - const int32_t start_y_max = (-base_idx_y + dilation_y - 1) / dilation_y; - const int32_t ker_y_start = MAX(0, start_y_max); - const int32_t start_x_max = (-base_idx_x + dilation_x - 1) / dilation_x; - const int32_t ker_x_start = MAX(0, start_x_max); - const int32_t end_min_y = (input_y - base_idx_y + dilation_y - 1) / dilation_y; - const int32_t ker_y_end = MIN(kernel_y, end_min_y); - const int32_t end_min_x = (input_x - base_idx_x + dilation_x - 1) / dilation_x; - const int32_t ker_x_end = MIN(kernel_x, end_min_x); - - for (int32_t i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++) - { - for (int32_t i_ker_x = ker_x_start; i_ker_x < ker_x_end; i_ker_x++) - { - const int32_t in_row = base_idx_y + dilation_y * i_ker_y; - const int32_t in_col = base_idx_x + dilation_x * i_ker_x; - - for (int32_t i_input_ch = 0; i_input_ch < input_ch; i_input_ch++) - { - conv_out_acc += input_data[(in_row * input_x + in_col) * input_ch + i_input_ch] * - filter_data[i_out_ch * input_ch * kernel_y * kernel_x + - (i_ker_y * kernel_x + i_ker_x) * input_ch + i_input_ch]; - } - } - } - - if (bias_data) - { - conv_out_acc += bias_data[i_out_ch]; - } - - int32_t conv_out = arm_nn_requantize_s64(conv_out_acc, reduced_multiplier, output_shift[i_out_ch]); - conv_out = MAX(conv_out, out_activation_min); - conv_out = MIN(conv_out, out_activation_max); - output_data[i_out_ch + (i_out_y * output_x + i_out_x) * output_ch] = (int16_t)conv_out; - } - } - } - /* Advance to the next batch */ - input_data += (input_x * input_y * input_ch); - output_data += (output_x * output_y * output_ch); - } - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -int32_t arm_convolve_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims) -{ - (void)input_dims; - (void)filter_dims; - return 0; -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c deleted file mode 100644 index e884b31..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_s8.c +++ /dev/null @@ -1,335 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_s8.c - * Description: s8 version of convolution using symmetric quantization. - * - * $Date: December 14, 2021 - * $Revision: V.2.1.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/* - * Basic s8 convolution function. - * - * Refer header file for details. Optimal use case for the DSP/MVE implementation is when input and output channels - * are multiples of 4 or atleast greater than 4. - * - */ - -arm_status arm_convolve_s8(const cmsis_nn_context *ctx, - const cmsis_nn_conv_params *conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data) -{ - (void)bias_dims; - - if (ctx->buf == NULL && arm_convolve_s8_get_buffer_size(input_dims, filter_dims) > 0) - { - return ARM_MATH_ARGUMENT_ERROR; - } - q15_t *buffer_a = (q15_t *)ctx->buf; - - const int32_t input_batches = input_dims->n; - const uint16_t input_x = input_dims->w; - const uint16_t input_y = input_dims->h; - const uint16_t input_ch = input_dims->c; - const uint16_t kernel_x = filter_dims->w; - const uint16_t kernel_y = filter_dims->h; - const uint16_t output_x = output_dims->w; - const uint16_t output_y = output_dims->h; - const uint16_t output_ch = output_dims->c; - - const uint16_t pad_x = conv_params->padding.w; - const uint16_t pad_y = conv_params->padding.h; - const uint16_t stride_x = conv_params->stride.w; - const uint16_t stride_y = conv_params->stride.h; - - const int32_t input_offset = conv_params->input_offset; - const int32_t out_offset = conv_params->output_offset; - const int32_t out_activation_min = conv_params->activation.min; - const int32_t out_activation_max = conv_params->activation.max; - int32_t *output_mult = quant_params->multiplier; - int32_t *output_shift = quant_params->shift; - - int i_batch; - for (i_batch = 0; i_batch < input_batches; i_batch++) - { -#if defined(ARM_MATH_MVEI) - /* Generate upto four columns from the input tensor a GEMM computation */ - q7_t *im2col_buf = (q7_t *)buffer_a; - q7_t *out = output_data; - int32_t buffer_fill_cnt = 0; - int32_t padded = 0; - const int32_t num_elem = kernel_x * kernel_y * input_ch; - const int32_t dilation_x = conv_params->dilation.w; - const int32_t dilation_y = conv_params->dilation.h; - - /* This part implements the im2col function */ - for (int i_out_y = 0; i_out_y < output_y; i_out_y++) - { - for (int i_out_x = 0; i_out_x < output_x; i_out_x++) - { - const int32_t base_idx_x = stride_x * i_out_x - pad_x; - const int32_t base_idx_y = stride_y * i_out_y - pad_y; - - for (int32_t i_ker_y = 0; i_ker_y < kernel_y; i_ker_y++) - { - for (int32_t i_ker_x = 0; i_ker_x < kernel_x; i_ker_x++) - { - const int32_t k_y = base_idx_y + dilation_y * i_ker_y; - const int32_t k_x = base_idx_x + dilation_x * i_ker_x; - - if (k_y < 0 || k_y >= input_y || k_x < 0 || k_x >= input_x) - { - memset(im2col_buf, (int8_t)-input_offset, sizeof(q7_t) * input_ch); - padded = 1; - } - else - { - arm_memcpy_q7(im2col_buf, input_data + (k_y * input_x + k_x) * input_ch, input_ch); - } - im2col_buf += input_ch; - } - } - - buffer_fill_cnt++; - - /* Computation is filed for every 4 columns */ - if (buffer_fill_cnt == 4 && (padded == 0)) - { - buffer_fill_cnt = 0; - out = arm_nn_mat_mul_core_4x_s8(num_elem, - num_elem, - (q7_t *)buffer_a, - filter_data, - output_ch, - conv_params, - quant_params, - bias_data, - out); - im2col_buf = (q7_t *)buffer_a; - } - else if (buffer_fill_cnt == 4 && (padded != 0)) - { - buffer_fill_cnt = 0; - out = arm_nn_mat_mult_s8(filter_data, - (q7_t *)buffer_a, - output_ch, - 4, - output_shift, - output_mult, - out_offset, - input_offset, - 0, - out_activation_min, - out_activation_max, - num_elem, - bias_data, - out); - - im2col_buf = (q7_t *)buffer_a; - padded = 0; - } - } - } - /* Handle left over columns */ - if (buffer_fill_cnt != 0) - { - out = arm_nn_mat_mult_s8(filter_data, - (q7_t *)buffer_a, - output_ch, - buffer_fill_cnt, - output_shift, - output_mult, - out_offset, - input_offset, - 0, - out_activation_min, - out_activation_max, - num_elem, - bias_data, - out); - } -#else // #if defined(ARM_MATH_MVEI) - const uint16_t dilation_x = conv_params->dilation.w; - const uint16_t dilation_y = conv_params->dilation.h; - - int32_t i_out_y, i_out_x, i_ker_y, i_ker_x; - - /* Generate two columns from the input tensor a GEMM computation */ - q15_t *two_column_buf = buffer_a; - q7_t *out = output_data; - - /* This part implements the im2col function */ - for (i_out_y = 0; i_out_y < output_y; i_out_y++) - { - for (i_out_x = 0; i_out_x < output_x; i_out_x++) - { - const int32_t base_idx_y = stride_y * i_out_y - pad_y; - const int32_t base_idx_x = stride_x * i_out_x - pad_x; - - for (i_ker_y = 0; i_ker_y < kernel_y; i_ker_y++) - { - for (i_ker_x = 0; i_ker_x < kernel_x; i_ker_x++) - { - const int32_t k_y = base_idx_y + dilation_y * i_ker_y; - const int32_t k_x = base_idx_x + dilation_x * i_ker_x; - - if (k_y < 0 || k_y >= input_y || k_x < 0 || k_x >= input_x) - { - /* Filling 0 for out-of-bound paddings */ - memset(two_column_buf, 0, sizeof(q15_t) * input_ch); - } - else - { - /* Copying the pixel data to column */ - arm_q7_to_q15_with_offset( - input_data + (k_y * input_x + k_x) * input_ch, two_column_buf, input_ch, input_offset); - } - two_column_buf += input_ch; - } - } - - /* Computation is filed for every 2 columns */ - if (two_column_buf == buffer_a + 2 * input_ch * kernel_y * kernel_x) - { - out = arm_nn_mat_mult_kernel_s8_s16(filter_data, - buffer_a, - output_ch, - output_shift, - output_mult, - out_offset, - out_activation_min, - out_activation_max, - input_ch * kernel_y * kernel_x, - bias_data, - out); - - /* counter reset */ - two_column_buf = buffer_a; - } - } - } - - /* left-over because odd number of output pixels */ - if (two_column_buf != buffer_a) - { - const q7_t *ker_a = filter_data; - int i; - - for (i = 0; i < output_ch; i++) - { - /* Load the accumulator with bias first */ - q31_t sum = 0; - if (bias_data) - { - sum = bias_data[i]; - } - - /* Point to the beginning of the im2col buffer where the input is available as a rearranged column */ - const q15_t *ip_as_col = buffer_a; - - /* 4 multiply and accumulates are done in one loop. */ -#if defined(ARM_MATH_DSP) - uint16_t col_count = (input_ch * kernel_y * kernel_x) >> 2; - - while (col_count) - { - q31_t ker_a1, ker_a2; - q31_t ip_b1, ip_b2; - - ker_a = read_and_pad(ker_a, &ker_a1, &ker_a2); - - ip_b1 = arm_nn_read_q15x2_ia(&ip_as_col); - sum = __SMLAD(ker_a1, ip_b1, sum); - ip_b2 = arm_nn_read_q15x2_ia(&ip_as_col); - sum = __SMLAD(ker_a2, ip_b2, sum); - - col_count--; - } - /* Handle left over mac */ - col_count = input_ch * kernel_y * kernel_x & 0x3; -#else - uint16_t col_count = input_ch * kernel_y * kernel_x; -#endif - while (col_count) - { - q7_t ker_a1 = *ker_a++; - q15_t ip_b1 = *ip_as_col++; - sum += ker_a1 * ip_b1; - col_count--; - } - - sum = arm_nn_requantize(sum, output_mult[i], output_shift[i]); - sum += out_offset; - sum = MAX(sum, out_activation_min); - sum = MIN(sum, out_activation_max); - *out++ = (q7_t)sum; - } - } -#endif // #if defined(ARM_MATH_MVEI) - /* Advance to the next batch */ - input_data += (input_x * input_y * input_ch); - output_data += (output_x * output_y * output_ch); - } - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -int32_t arm_convolve_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims) -{ -#if defined(ARM_MATH_MVEI) - int32_t col_length = input_dims->c * filter_dims->w * filter_dims->h; - // Get number of complete int16 lanes(multiple of 8) for given col_length. This is dependent on - // implementation of arm_nn_mat_mult_s8 - col_length = (col_length + 7) / 8; - // 4 -> number of im2col buffers, 8 -> 8 elements per Q register - return 4 * col_length * 8 * (int32_t)sizeof(int8_t); -#else - return (2 * input_dims->c * filter_dims->w * filter_dims->h) * (int32_t)sizeof(int16_t); -#endif -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s16.c deleted file mode 100644 index 75bb26f..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s16.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (C) 2021-2022 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_wrapper_s16.c - * Description: s16 convolution layer wrapper function with the main purpose to call the optimal kernel available in - * cmsis-nn to perform the convolution. - * - * $Date: 13 January 2022 - * $Revision: V.1.2.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/* - * Convolution layer - * - * Refer header file for details. - * - */ - -arm_status arm_convolve_wrapper_s16(const cmsis_nn_context *ctx, - const cmsis_nn_conv_params *conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q15_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int64_t *bias_data, - const cmsis_nn_dims *output_dims, - q15_t *output_data) -{ -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - if (filter_dims->w * filter_dims->h * input_dims->c < 512 && - (conv_params->dilation.w == 1 && conv_params->dilation.h == 1)) - { - return arm_convolve_fast_s16(ctx, - conv_params, - quant_params, - input_dims, - input_data, - filter_dims, - filter_data, - bias_dims, - bias_data, - output_dims, - output_data); - } - else - { - return arm_convolve_s16(ctx, - conv_params, - quant_params, - input_dims, - input_data, - filter_dims, - filter_data, - bias_dims, - bias_data, - output_dims, - output_data); - } -#else - return arm_convolve_s16(ctx, - conv_params, - quant_params, - input_dims, - input_data, - filter_dims, - filter_data, - bias_dims, - bias_data, - output_dims, - output_data); -#endif -} - -int32_t arm_convolve_wrapper_s16_get_buffer_size(const cmsis_nn_conv_params *conv_params, - const cmsis_nn_dims *input_dims, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims) -{ - (void)conv_params; - (void)output_dims; - -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - if (filter_dims->w * filter_dims->h * input_dims->c < 512 && - (conv_params->dilation.w == 1 && conv_params->dilation.h == 1)) - { - return arm_convolve_fast_s16_get_buffer_size(input_dims, filter_dims); - } - - return arm_convolve_s16_get_buffer_size(input_dims, filter_dims); -#else - return arm_convolve_s16_get_buffer_size(input_dims, filter_dims); -#endif -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c deleted file mode 100644 index bf1cd70..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_convolve_wrapper_s8.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_convolve_wrapper_s8.c - * Description: s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in - * cmsis-nn to perform the convolution. - * - * $Date: 02. December 2021 - * $Revision: V.1.1.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/* - * Convolution layer - * - * Refer header file for details. - * - */ - -arm_status arm_convolve_wrapper_s8(const cmsis_nn_context *ctx, - const cmsis_nn_conv_params *conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *filter_dims, - const q7_t *filter_data, - const cmsis_nn_dims *bias_dims, - const int32_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data) -{ - if ((conv_params->padding.w == 0) && (conv_params->padding.h == 0) && (input_dims->c % 4 == 0) && - (conv_params->stride.w == 1) && (conv_params->stride.h == 1) && (filter_dims->w == 1) && - (filter_dims->h == 1) && (conv_params->dilation.w == 1 && conv_params->dilation.h == 1)) - { - return arm_convolve_1x1_s8_fast(ctx, - conv_params, - quant_params, - input_dims, - input_data, - filter_dims, - filter_data, - bias_dims, - bias_data, - output_dims, - output_data); - } - else if ((output_dims->h == 1) && (input_dims->h == 1) && (filter_dims->h == 1) && (output_dims->w % 4 == 0) && - (input_dims->n == 1) && (conv_params->dilation.w == 1 && conv_params->dilation.h == 1)) - { - return arm_convolve_1_x_n_s8(ctx, - conv_params, - quant_params, - input_dims, - input_data, - filter_dims, - filter_data, - bias_dims, - bias_data, - output_dims, - output_data); - } - else - { - return arm_convolve_s8(ctx, - conv_params, - quant_params, - input_dims, - input_data, - filter_dims, - filter_data, - bias_dims, - bias_data, - output_dims, - output_data); - } -} - -int32_t arm_convolve_wrapper_s8_get_buffer_size(const cmsis_nn_conv_params *conv_params, - const cmsis_nn_dims *input_dims, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims) -{ - if ((conv_params->padding.w == 0) && (conv_params->padding.h == 0) && (input_dims->c % 4 == 0) && - (conv_params->stride.w == 1) && (conv_params->stride.h == 1) && (filter_dims->w == 1) && - (filter_dims->h == 1) && (conv_params->dilation.w == 1 && conv_params->dilation.h == 1)) - { - return arm_convolve_1x1_s8_fast_get_buffer_size(input_dims); - } - else if ((output_dims->h == 1) && (input_dims->h == 1) && (filter_dims->h == 1) && (output_dims->w % 4 == 0) && - (input_dims->n == 1) && (conv_params->dilation.w == 1 && conv_params->dilation.h == 1)) - { - return arm_convolve_1_x_n_s8_get_buffer_size(input_dims, filter_dims); - } - else - { - return arm_convolve_s8_get_buffer_size(input_dims, filter_dims); - } -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_3x3_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_3x3_s8.c deleted file mode 100644 index d5569b3..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_3x3_s8.c +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_depthwise_conv_3x3_s8.c - * Description: Optimized s8 depthwise convolution function for channel - * multiplier of 1 and 3x3 kernel size. - * - * $Date: 09. October 2020 - * $Revision: V.2.0.1 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/* - * Optimized s8 depthwise convolution function with constraint that - * in_channel == out_channel and kernel_x == kernel_y == 3 with pads at most 1 - * - * Refer prototype header file for details. - * - */ - -arm_status arm_depthwise_conv_3x3_s8(const cmsis_nn_context *ctx, - const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input, - const cmsis_nn_dims *filter_dims, - const q7_t *kernel, - const cmsis_nn_dims *bias_dims, - const int32_t *bias, - const cmsis_nn_dims *output_dims, - q7_t *output) -{ - (void)ctx; - (void)bias_dims; - - const int32_t input_x = input_dims->w; - const int32_t input_y = input_dims->h; - const int32_t input_ch = input_dims->c; - const int32_t output_ch = output_dims->c; - const int32_t pad_x = dw_conv_params->padding.w; - const int32_t pad_y = dw_conv_params->padding.h; - const int32_t stride_x = dw_conv_params->stride.w; - const int32_t stride_y = dw_conv_params->stride.h; - const int32_t *output_shift = quant_params->shift; - const int32_t *output_mult = quant_params->multiplier; - const int32_t output_x = output_dims->w; - const int32_t output_y = output_dims->h; - const int32_t output_offset = dw_conv_params->output_offset; - const int32_t input_offset = dw_conv_params->input_offset; - const int32_t output_activation_min = dw_conv_params->activation.min; - const int32_t output_activation_max = dw_conv_params->activation.max; - - /* Check input constraints input_ch == output_ch */ - if (input_ch != output_ch) - { - return ARM_MATH_SIZE_MISMATCH; - } - /* Check input constraints pad_x <= 1 */ - if (pad_x > 1 || filter_dims->w != 3 || filter_dims->h != 3) - { - return ARM_MATH_ARGUMENT_ERROR; - } - - for (int32_t in_h = -pad_y, out_h = 0, out_idx = 0; out_h < output_y; in_h += stride_y, ++out_h) - { - for (int32_t in_w = -pad_x, out_w = 0, ker_h_start = MAX(0, -in_h); out_w < output_x; in_w += stride_x, ++out_w) - { - int32_t in_ch = 0; - int32_t ker_w_start = MAX(0, -in_w); - - for (; in_ch <= (input_ch - 4); in_ch += 4) - { - int32_t out_buff0 = bias[in_ch + 0]; - int32_t out_buff1 = bias[in_ch + 1]; - int32_t out_buff2 = bias[in_ch + 2]; - int32_t out_buff3 = bias[in_ch + 3]; - - const int8_t *input_ptr = input + (in_h + ker_h_start) * (input_ch * input_x) + in_w * input_ch + in_ch; - const int8_t *kernel_ptr = kernel + ker_h_start * (input_ch * 3) + in_ch; - - for (int32_t ker_h = ker_h_start; ker_h < MIN(3, input_y - in_h); ++ker_h) - { - int32_t in_val = 0; - int32_t ker_val = 0; - - if (ker_w_start == 0) - { - in_val = arm_nn_read_q7x4(input_ptr); - ker_val = arm_nn_read_q7x4(kernel_ptr); - - out_buff0 += ((int8_t)in_val + input_offset) * (int8_t)ker_val; - out_buff1 += ((int8_t)(in_val >> 8) + input_offset) * (int8_t)(ker_val >> 8); - out_buff2 += ((int8_t)(in_val >> 16) + input_offset) * (int8_t)(ker_val >> 16); - out_buff3 += ((int8_t)(in_val >> 24) + input_offset) * (int8_t)(ker_val >> 24); - } - - in_val = arm_nn_read_q7x4(input_ptr + input_ch); - ker_val = arm_nn_read_q7x4(kernel_ptr + input_ch); - - out_buff0 += ((int8_t)in_val + input_offset) * (int8_t)ker_val; - out_buff1 += ((int8_t)(in_val >> 8) + input_offset) * (int8_t)(ker_val >> 8); - out_buff2 += ((int8_t)(in_val >> 16) + input_offset) * (int8_t)(ker_val >> 16); - out_buff3 += ((int8_t)(in_val >> 24) + input_offset) * (int8_t)(ker_val >> 24); - - if ((input_x - in_w) >= 3) - { - in_val = arm_nn_read_q7x4(input_ptr + (input_ch << 1)); - ker_val = arm_nn_read_q7x4(kernel_ptr + (input_ch << 1)); - - out_buff0 += ((int8_t)in_val + input_offset) * (int8_t)ker_val; - out_buff1 += ((int8_t)(in_val >> 8) + input_offset) * (int8_t)(ker_val >> 8); - out_buff2 += ((int8_t)(in_val >> 16) + input_offset) * (int8_t)(ker_val >> 16); - out_buff3 += ((int8_t)(in_val >> 24) + input_offset) * (int8_t)(ker_val >> 24); - } - - input_ptr += (input_ch * input_x); - kernel_ptr += (input_ch * 3); - } - - out_buff0 = arm_nn_requantize(out_buff0, output_mult[in_ch + 0], output_shift[in_ch + 0]); - out_buff1 = arm_nn_requantize(out_buff1, output_mult[in_ch + 1], output_shift[in_ch + 1]); - out_buff2 = arm_nn_requantize(out_buff2, output_mult[in_ch + 2], output_shift[in_ch + 2]); - out_buff3 = arm_nn_requantize(out_buff3, output_mult[in_ch + 3], output_shift[in_ch + 3]); - - out_buff0 += output_offset; - out_buff1 += output_offset; - out_buff2 += output_offset; - out_buff3 += output_offset; - - out_buff0 = MIN(MAX(out_buff0, output_activation_min), output_activation_max); - out_buff1 = MIN(MAX(out_buff1, output_activation_min), output_activation_max); - out_buff2 = MIN(MAX(out_buff2, output_activation_min), output_activation_max); - out_buff3 = MIN(MAX(out_buff3, output_activation_min), output_activation_max); - - output[out_idx++] = (int8_t)out_buff0; - output[out_idx++] = (int8_t)out_buff1; - output[out_idx++] = (int8_t)out_buff2; - output[out_idx++] = (int8_t)out_buff3; - } - - // Leftover - for (; in_ch < input_ch; ++in_ch) - { - int32_t out_buff = bias[in_ch]; - - const int8_t *input_ptr = input + (in_h + ker_h_start) * (input_ch * input_x) + in_w * input_ch + in_ch; - const int8_t *kernel_ptr = kernel + ker_h_start * (input_ch * 3) + in_ch; - - for (int32_t ker_h = ker_h_start; ker_h < MIN(3, input_y - in_h); ++ker_h) - { - if (ker_w_start == 0) - { - out_buff += (*(input_ptr) + input_offset) * *(kernel_ptr); - } - - out_buff += (*(input_ptr + input_ch) + input_offset) * *(kernel_ptr + input_ch); - - if ((input_x - in_w) >= 3) - { - out_buff += (*(input_ptr + (input_ch << 1)) + input_offset) * *(kernel_ptr + (input_ch << 1)); - } - - input_ptr += (input_ch * input_x); - kernel_ptr += (input_ch * 3); - } - - out_buff = arm_nn_requantize(out_buff, output_mult[in_ch], output_shift[in_ch]); - out_buff += output_offset; - out_buff = MIN(MAX(out_buff, output_activation_min), output_activation_max); - output[out_idx++] = (int8_t)out_buff; - } - } - } - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s16.c deleted file mode 100644 index 42e4bbd..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s16.c +++ /dev/null @@ -1,292 +0,0 @@ -/* - * Copyright (C) 2022 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_depthwise_conv_s16.c - * Description: s16 version of depthwise convolution. - * - * $Date: 26. Jan 2022 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -static void __attribute__((unused)) depthwise_conv_s16_mult_4_s16(const int16_t *input, - const int32_t input_x, - const int32_t input_y, - const int32_t input_ch, - const int8_t *kernel, - const int32_t output_ch, - const int32_t ch_mult, - const int32_t kernel_x, - const int32_t kernel_y, - const int32_t pad_x, - const int32_t pad_y, - const int32_t stride_x, - const int32_t stride_y, - const int64_t *bias, - int16_t *output, - const int32_t *output_shift, - const int32_t *output_mult, - const int32_t output_x, - const int32_t output_y, - const int32_t output_activation_min, - const int32_t output_activation_max) -{ - for (int32_t in_h = -pad_y, out_h = 0, out_idx = 0; out_h < output_y; in_h += stride_y, ++out_h) - { - for (int32_t in_w = -pad_x, out_w = 0, ker_h_start = MAX(0, -in_h); out_w < output_x; in_w += stride_x, ++out_w) - { - for (int32_t in_ch = 0, out_ch = 0, ker_w_start = MAX(0, -in_w); out_ch < output_ch; - ++in_ch, out_ch += ch_mult) - { - for (int mult_tile = 0; mult_tile < ch_mult; mult_tile += 4) - { - int32_t out_buff32[4] = {REDUCE_MULTIPLIER(output_mult[out_ch + 0 + mult_tile]), - REDUCE_MULTIPLIER(output_mult[out_ch + 1 + mult_tile]), - REDUCE_MULTIPLIER(output_mult[out_ch + 2 + mult_tile]), - REDUCE_MULTIPLIER(output_mult[out_ch + 3 + mult_tile])}; - - int64_t out_buff[4] = {0, 0, 0, 0}; - - if (bias) - { - out_buff[0] = bias[out_ch + 0 + mult_tile]; - out_buff[1] = bias[out_ch + 1 + mult_tile]; - out_buff[2] = bias[out_ch + 2 + mult_tile]; - out_buff[3] = bias[out_ch + 3 + mult_tile]; - } - - for (int32_t ker_h = ker_h_start; ker_h < MIN(kernel_y, input_y - in_h); ++ker_h) - { - int32_t ker_idx = ker_h * (output_ch * kernel_x) + ker_w_start * output_ch + out_ch; - int32_t in_idx = (in_h + ker_h) * (input_ch * input_x) + in_w * input_ch + in_ch; -#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) -#pragma clang loop unroll(disable) -#endif - for (int32_t ker_w = ker_w_start; ker_w < MIN(kernel_x, input_x - in_w); - ++ker_w, ker_idx += output_ch) - { - // TODO: Unroll of 4 with 64 bit accumulator will probably result in too much register - // spills. Try with unroll of 2 when enabling this. - int32_t in_val = input[in_idx + ker_w * input_ch]; - out_buff[0] += in_val * kernel[ker_idx + 0 + mult_tile]; - out_buff[1] += in_val * kernel[ker_idx + 1 + mult_tile]; - out_buff[2] += in_val * kernel[ker_idx + 2 + mult_tile]; - out_buff[3] += in_val * kernel[ker_idx + 3 + mult_tile]; - } - } - - out_buff32[0] = - arm_nn_requantize_s64(out_buff[0], out_buff32[0], output_shift[out_ch + 0 + mult_tile]); - out_buff32[1] = - arm_nn_requantize_s64(out_buff[1], out_buff32[1], output_shift[out_ch + 1 + mult_tile]); - out_buff32[2] = - arm_nn_requantize_s64(out_buff[2], out_buff32[2], output_shift[out_ch + 2 + mult_tile]); - out_buff32[3] = - arm_nn_requantize_s64(out_buff[3], out_buff32[3], output_shift[out_ch + 3 + mult_tile]); - - out_buff32[0] = MIN(MAX(out_buff32[0], output_activation_min), output_activation_max); - out_buff32[1] = MIN(MAX(out_buff32[1], output_activation_min), output_activation_max); - out_buff32[2] = MIN(MAX(out_buff32[2], output_activation_min), output_activation_max); - out_buff32[3] = MIN(MAX(out_buff32[3], output_activation_min), output_activation_max); - - output[out_idx++] = (int16_t)out_buff32[0]; - output[out_idx++] = (int16_t)out_buff32[1]; - output[out_idx++] = (int16_t)out_buff32[2]; - output[out_idx++] = (int16_t)out_buff32[3]; - } - } - } - } -} - -static void depthwise_conv_s16_generic_s16(const int16_t *input, - const uint16_t input_batches, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_ch, - const int8_t *kernel, - const uint16_t ch_mult, - const uint16_t kernel_x, - const uint16_t kernel_y, - const uint16_t pad_x, - const uint16_t pad_y, - const uint16_t stride_x, - const uint16_t stride_y, - const int64_t *bias, - int16_t *output, - const int32_t *output_shift, - const int32_t *output_mult, - const uint16_t output_x, - const uint16_t output_y, - const int32_t output_activation_min, - const int32_t output_activation_max, - const uint16_t dilation_x, - const uint16_t dilation_y) - -{ - for (int i_batch = 0; i_batch < input_batches; i_batch++) - { - for (int i_out_y = 0; i_out_y < output_y; i_out_y++) - { - const int16_t base_idx_y = (i_out_y * stride_y) - pad_y; - for (int i_out_x = 0; i_out_x < output_x; i_out_x++) - { - const int16_t base_idx_x = (i_out_x * stride_x) - pad_x; - for (int i_input_ch = 0; i_input_ch < input_ch; i_input_ch++) - { - for (int i_ch_mult = 0; i_ch_mult < ch_mult; i_ch_mult++) - { - const int idx_out_ch = i_ch_mult + i_input_ch * ch_mult; - - const q31_t reduced_multiplier = REDUCE_MULTIPLIER(output_mult[idx_out_ch]); - int64_t acc_0 = 0; - - int ker_y_start; - int ker_x_start; - int ker_y_end; - int ker_x_end; - - if (dilation_x > 1) - { - const int32_t start_x_max = (-base_idx_x + dilation_x - 1) / dilation_x; - ker_x_start = MAX(0, start_x_max); - const int32_t end_min_x = (input_x - base_idx_x + dilation_x - 1) / dilation_x; - ker_x_end = MIN(kernel_x, end_min_x); - } - else - { - ker_x_start = MAX(0, -base_idx_x); - ker_x_end = MIN(kernel_x, input_x - base_idx_x); - } - - if (dilation_y > 1) - { - const int32_t start_y_max = (-base_idx_y + dilation_y - 1) / dilation_y; - ker_y_start = MAX(0, start_y_max); - const int32_t end_min_y = (input_y - base_idx_y + dilation_y - 1) / dilation_y; - ker_y_end = MIN(kernel_y, end_min_y); - } - else - { - ker_y_start = MAX(0, -base_idx_y); - ker_y_end = MIN(kernel_y, input_y - base_idx_y); - } - - if (bias) - { - acc_0 = bias[idx_out_ch]; - } - - for (int i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++) - { - const int32_t idx_y = base_idx_y + dilation_y * i_ker_y; - for (int i_ker_x = ker_x_start; i_ker_x < ker_x_end; i_ker_x++) - { - const int32_t idx_x = base_idx_x + dilation_x * i_ker_x; - int32_t idx_0 = (idx_y * input_x + idx_x) * input_ch + i_input_ch; - int32_t ker_idx_0 = (i_ker_y * kernel_x + i_ker_x) * (input_ch * ch_mult) + idx_out_ch; - - acc_0 += input[idx_0] * kernel[ker_idx_0]; - } - } - - /* Requantize and clamp output to provided range */ - int32_t result = arm_nn_requantize_s64(acc_0, reduced_multiplier, output_shift[idx_out_ch]); - result = MAX(result, output_activation_min); - result = MIN(result, output_activation_max); - *output++ = (int16_t)result; - } - } - } - } - /* Advance to the next batch */ - input += (input_x * input_y * input_ch); - } -} - -/* - * Basic s16 depthwise convolution function. - * - * Refer header file for details. - * - */ -arm_status arm_depthwise_conv_s16(const cmsis_nn_context *ctx, - const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q15_t *input, - const cmsis_nn_dims *filter_dims, - const q7_t *kernel, - const cmsis_nn_dims *bias_dims, - const int64_t *bias, - const cmsis_nn_dims *output_dims, - q15_t *output) -{ - const uint16_t dilation_x = dw_conv_params->dilation.w; - const uint16_t dilation_y = dw_conv_params->dilation.h; - - (void)bias_dims; - (void)ctx; - - depthwise_conv_s16_generic_s16(input, - input_dims->n, - input_dims->w, - input_dims->h, - input_dims->c, - kernel, - dw_conv_params->ch_mult, - filter_dims->w, - filter_dims->h, - dw_conv_params->padding.w, - dw_conv_params->padding.h, - dw_conv_params->stride.w, - dw_conv_params->stride.h, - bias, - output, - quant_params->shift, - quant_params->multiplier, - output_dims->w, - output_dims->h, - dw_conv_params->activation.min, - dw_conv_params->activation.max, - dilation_x, - dilation_y); - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c deleted file mode 100644 index 297b7af..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8.c +++ /dev/null @@ -1,347 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_depthwise_conv_s8.c - * Description: s8 version of depthwise convolution. - * - * $Date: 30. Dec 2021 - * $Revision: V.2.7.1 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -static void depthwise_conv_s8_mult_4(const int8_t *input, - const int32_t input_x, - const int32_t input_y, - const int32_t input_ch, - const int8_t *kernel, - const int32_t output_ch, - const int32_t ch_mult, - const int32_t kernel_x, - const int32_t kernel_y, - const int32_t pad_x, - const int32_t pad_y, - const int32_t stride_x, - const int32_t stride_y, - const int32_t *bias, - int8_t *output, - const int32_t *output_shift, - const int32_t *output_mult, - const int32_t output_x, - const int32_t output_y, - const int32_t output_offset, - const int32_t input_offset, - const int32_t output_activation_min, - const int32_t output_activation_max) -{ - for (int32_t in_h = -pad_y, out_h = 0, out_idx = 0; out_h < output_y; in_h += stride_y, ++out_h) - { - for (int32_t in_w = -pad_x, out_w = 0, ker_h_start = MAX(0, -in_h); out_w < output_x; in_w += stride_x, ++out_w) - { - for (int32_t in_ch = 0, out_ch = 0, ker_w_start = MAX(0, -in_w); out_ch < output_ch; - ++in_ch, out_ch += ch_mult) - { - for (int mult_tile = 0; mult_tile < ch_mult; mult_tile += 4) - { - int32_t out_buff[4] = {0, 0, 0, 0}; - if (bias) - { - out_buff[0] = bias[out_ch + 0 + mult_tile]; - out_buff[1] = bias[out_ch + 1 + mult_tile]; - out_buff[2] = bias[out_ch + 2 + mult_tile]; - out_buff[3] = bias[out_ch + 3 + mult_tile]; - } - - for (int32_t ker_h = ker_h_start; ker_h < MIN(kernel_y, input_y - in_h); ++ker_h) - { - int32_t ker_idx = ker_h * (output_ch * kernel_x) + ker_w_start * output_ch + out_ch; - int32_t in_idx = (in_h + ker_h) * (input_ch * input_x) + in_w * input_ch + in_ch; -#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) -#pragma clang loop unroll(disable) -#endif - for (int32_t ker_w = ker_w_start; ker_w < MIN(kernel_x, input_x - in_w); - ++ker_w, ker_idx += output_ch) - { - int32_t in_val = input[in_idx + ker_w * input_ch] + input_offset; - out_buff[0] += in_val * kernel[ker_idx + 0 + mult_tile]; - out_buff[1] += in_val * kernel[ker_idx + 1 + mult_tile]; - out_buff[2] += in_val * kernel[ker_idx + 2 + mult_tile]; - out_buff[3] += in_val * kernel[ker_idx + 3 + mult_tile]; - } - } -#if defined(ARM_MATH_MVEI) - (void)out_idx; - int32x4_t res = vldrwq_s32(out_buff); - res = arm_requantize_mve_32x4(res, - vldrwq_s32(&output_mult[out_ch + mult_tile]), - vldrwq_s32(&output_shift[out_ch + mult_tile])); - res = vaddq_n_s32(res, output_offset); - - res = vmaxq_s32(res, vdupq_n_s32(output_activation_min)); - res = vminq_s32(res, vdupq_n_s32(output_activation_max)); - vstrbq_s32(output, res); - output += 4; -#else - out_buff[0] = arm_nn_requantize( - out_buff[0], output_mult[out_ch + 0 + mult_tile], output_shift[out_ch + 0 + mult_tile]); - out_buff[1] = arm_nn_requantize( - out_buff[1], output_mult[out_ch + 1 + mult_tile], output_shift[out_ch + 1 + mult_tile]); - out_buff[2] = arm_nn_requantize( - out_buff[2], output_mult[out_ch + 2 + mult_tile], output_shift[out_ch + 2 + mult_tile]); - out_buff[3] = arm_nn_requantize( - out_buff[3], output_mult[out_ch + 3 + mult_tile], output_shift[out_ch + 3 + mult_tile]); - - out_buff[0] += output_offset; - out_buff[1] += output_offset; - out_buff[2] += output_offset; - out_buff[3] += output_offset; - - out_buff[0] = MIN(MAX(out_buff[0], output_activation_min), output_activation_max); - out_buff[1] = MIN(MAX(out_buff[1], output_activation_min), output_activation_max); - out_buff[2] = MIN(MAX(out_buff[2], output_activation_min), output_activation_max); - out_buff[3] = MIN(MAX(out_buff[3], output_activation_min), output_activation_max); - - output[out_idx++] = (int8_t)out_buff[0]; - output[out_idx++] = (int8_t)out_buff[1]; - output[out_idx++] = (int8_t)out_buff[2]; - output[out_idx++] = (int8_t)out_buff[3]; - -#endif - } - } - } - } -} - -static void depthwise_conv_s8_generic(const q7_t *input, - const uint16_t input_batches, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_ch, - const q7_t *kernel, - const uint16_t output_ch, - const uint16_t ch_mult, - const uint16_t kernel_x, - const uint16_t kernel_y, - const uint16_t pad_x, - const uint16_t pad_y, - const uint16_t stride_x, - const uint16_t stride_y, - const int32_t *bias, - q7_t *output, - const int32_t *output_shift, - const int32_t *output_mult, - const uint16_t output_x, - const uint16_t output_y, - const int32_t output_offset, - const int32_t input_offset, - const int32_t output_activation_min, - const int32_t output_activation_max, - const uint16_t dilation_x, - const uint16_t dilation_y) - -{ - (void)output_ch; - int i_out = 0; - int i_batch; - - for (i_batch = 0; i_batch < input_batches; i_batch++) - { - for (int i_out_y = 0; i_out_y < output_y; i_out_y++) - { - const int16_t base_idx_y = (i_out_y * stride_y) - pad_y; - for (int i_out_x = 0; i_out_x < output_x; i_out_x++) - { - const int16_t base_idx_x = (i_out_x * stride_x) - pad_x; - for (int i_input_ch = 0; i_input_ch < input_ch; i_input_ch++) - { - for (int i_ch_mult = 0; i_ch_mult < ch_mult; i_ch_mult++) - { - const int idx_out_ch = i_ch_mult + i_input_ch * ch_mult; - int32_t acc_0 = 0; - - int ker_y_start; - int ker_x_start; - int ker_y_end; - int ker_x_end; - - if (dilation_x > 1) - { - const int32_t start_x_max = (-base_idx_x + dilation_x - 1) / dilation_x; - ker_x_start = MAX(0, start_x_max); - const int32_t end_min_x = (input_x - base_idx_x + dilation_x - 1) / dilation_x; - ker_x_end = MIN(kernel_x, end_min_x); - } - else - { - ker_x_start = MAX(0, -base_idx_x); - ker_x_end = MIN(kernel_x, input_x - base_idx_x); - } - - if (dilation_y > 1) - { - const int32_t start_y_max = (-base_idx_y + dilation_y - 1) / dilation_y; - ker_y_start = MAX(0, start_y_max); - const int32_t end_min_y = (input_y - base_idx_y + dilation_y - 1) / dilation_y; - ker_y_end = MIN(kernel_y, end_min_y); - } - else - { - ker_y_start = MAX(0, -base_idx_y); - ker_y_end = MIN(kernel_y, input_y - base_idx_y); - } - - if (bias) - { - acc_0 = bias[idx_out_ch]; - } - - for (int i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++) - { - const int32_t idx_y = base_idx_y + dilation_y * i_ker_y; - for (int i_ker_x = ker_x_start; i_ker_x < ker_x_end; i_ker_x++) - { - const int32_t idx_x = base_idx_x + dilation_x * i_ker_x; - int32_t idx_0 = (idx_y * input_x + idx_x) * input_ch + i_input_ch; - int32_t ker_idx_0 = (i_ker_y * kernel_x + i_ker_x) * (input_ch * ch_mult) + idx_out_ch; - - acc_0 += (input[idx_0] + input_offset) * kernel[ker_idx_0]; - } - } - - /* Requantize and clamp output to provided range */ - acc_0 = arm_nn_requantize(acc_0, output_mult[idx_out_ch], output_shift[idx_out_ch]); - acc_0 += output_offset; - acc_0 = MAX(acc_0, output_activation_min); - acc_0 = MIN(acc_0, output_activation_max); - - output[i_out++] = acc_0; - } - } - } - } - /* Advance to the next batch */ - input += (input_x * input_y * input_ch); - } -} - -/* - * Basic s8 depthwise convolution function. - * - * Refer header file for details. - * Optimization using DSP extension is not available for the generic case where channel multiplier is > 1. - * - */ -arm_status arm_depthwise_conv_s8(const cmsis_nn_context *ctx, - const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input, - const cmsis_nn_dims *filter_dims, - const q7_t *kernel, - const cmsis_nn_dims *bias_dims, - const int32_t *bias, - const cmsis_nn_dims *output_dims, - q7_t *output) -{ - const uint16_t dilation_x = dw_conv_params->dilation.w; - const uint16_t dilation_y = dw_conv_params->dilation.h; - - (void)dw_conv_params->dilation; - (void)bias_dims; - (void)ctx; - - if (dw_conv_params->ch_mult % 4 == 0 && input_dims->n == 1 && dw_conv_params->dilation.w == 1 && - dw_conv_params->dilation.h == 1) - { - depthwise_conv_s8_mult_4(input, - input_dims->w, - input_dims->h, - input_dims->c, - kernel, - output_dims->c, - dw_conv_params->ch_mult, - filter_dims->w, - filter_dims->h, - dw_conv_params->padding.w, - dw_conv_params->padding.h, - dw_conv_params->stride.w, - dw_conv_params->stride.h, - bias, - output, - quant_params->shift, - quant_params->multiplier, - output_dims->w, - output_dims->h, - dw_conv_params->output_offset, - dw_conv_params->input_offset, - dw_conv_params->activation.min, - dw_conv_params->activation.max); - } - else - { - depthwise_conv_s8_generic(input, - input_dims->n, - input_dims->w, - input_dims->h, - input_dims->c, - kernel, - output_dims->c, - dw_conv_params->ch_mult, - filter_dims->w, - filter_dims->h, - dw_conv_params->padding.w, - dw_conv_params->padding.h, - dw_conv_params->stride.w, - dw_conv_params->stride.h, - bias, - output, - quant_params->shift, - quant_params->multiplier, - output_dims->w, - output_dims->h, - dw_conv_params->output_offset, - dw_conv_params->input_offset, - dw_conv_params->activation.min, - dw_conv_params->activation.max, - dilation_x, - dilation_y); - } - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c deleted file mode 100644 index 1edac04..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_s8_opt.c +++ /dev/null @@ -1,433 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_depthwise_conv_s8_opt.c - * Description: Optimized s8 depthwise separable convolution function for - * channel multiplier of 1. - * - * $Date: January 26, 2021 - * $Revision: V.2.0.3 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/* - * Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel - * - * Refer prototype header file for details. - * - */ - -arm_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx, - const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input, - const cmsis_nn_dims *filter_dims, - const q7_t *kernel, - const cmsis_nn_dims *bias_dims, - const int32_t *bias, - const cmsis_nn_dims *output_dims, - q7_t *output) -{ - - const int32_t input_ch = input_dims->c; - const int32_t output_ch = output_dims->c; - - /* Check input constraints input_ch == output_ch */ - if (input_ch != output_ch) - { - return ARM_MATH_SIZE_MISMATCH; - } - - if (ctx->buf == NULL && arm_depthwise_conv_s8_opt_get_buffer_size(input_dims, filter_dims) > 0) - { - return ARM_MATH_ARGUMENT_ERROR; - } -#ifdef ARM_MATH_DSP - const int32_t input_x = input_dims->w; - const int32_t input_y = input_dims->h; - const int32_t kernel_x = filter_dims->w; - const int32_t kernel_y = filter_dims->h; - const int32_t pad_x = dw_conv_params->padding.w; - const int32_t pad_y = dw_conv_params->padding.h; - const int32_t stride_x = dw_conv_params->stride.w; - const int32_t stride_y = dw_conv_params->stride.h; - const int32_t *output_shift = quant_params->shift; - const int32_t *output_mult = quant_params->multiplier; - const int32_t output_x = output_dims->w; - const int32_t output_y = output_dims->h; - const int32_t output_offset = dw_conv_params->output_offset; - const int32_t input_offset = dw_conv_params->input_offset; - const int32_t output_activation_min = dw_conv_params->activation.min; - const int32_t output_activation_max = dw_conv_params->activation.max; - q15_t *buffer_a = (q15_t *)ctx->buf; - -#ifdef ARM_MATH_MVEI - (void)bias_dims; - /* Generate two columns from the input tensor */ - q7_t *lhs_buffer = (q7_t *)buffer_a; - q7_t *out = output; - int padded = 0; - int buffer_count = 0; - const int32_t kernel_size = kernel_x * kernel_y; - - /* This part implements the im2col function */ - for (int i_out_y = 0, base_idx_y = -pad_y; i_out_y < output_y; base_idx_y += stride_y, i_out_y++) - { - for (int i_out_x = 0, base_idx_x = -pad_x; i_out_x < output_x; base_idx_x += stride_x, i_out_x++) - { - for (int i_ker_y = base_idx_y; i_ker_y < base_idx_y + kernel_y; i_ker_y++) - { - for (int i_ker_x = base_idx_x; i_ker_x < base_idx_x + kernel_x; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= input_y || i_ker_x < 0 || i_ker_x >= input_x) - { - arm_memset_q7(lhs_buffer, (int8_t)-input_offset, (uint32_t)input_ch); - padded = 1; - } - else - { - arm_memcpy_q7(lhs_buffer, input + (i_ker_y * input_x + i_ker_x) * input_ch, (uint32_t)input_ch); - } - lhs_buffer += input_ch; - } - } - buffer_count++; - - if (buffer_count == 4) - { - lhs_buffer = (q7_t *)buffer_a; - if (padded == 0) - { - out = arm_nn_depthwise_conv_nt_t_s8(lhs_buffer, - kernel, - input_offset, - input_ch, - output_shift, - output_mult, - output_offset, - output_activation_min, - output_activation_max, - kernel_size, - bias, - out); - } - else - { - out = arm_nn_depthwise_conv_nt_t_padded_s8(lhs_buffer, - kernel, - input_offset, - input_ch, - output_shift, - output_mult, - output_offset, - output_activation_min, - output_activation_max, - kernel_size, - bias, - out); - padded = 0; - } - buffer_count = 0; - } - } - } - - /* Handle left over buffers */ - lhs_buffer = (q7_t *)buffer_a; - - for (int i_buf = 0; i_buf < buffer_count; i_buf++) - { - int32_t loop_count = (input_ch + 3) / 4; - - int32_t num_ch_to_process = input_ch; - for (int i_loop_cnt = 0, offset = 0; i_loop_cnt < loop_count; num_ch_to_process -= 4, offset += 4, i_loop_cnt++) - { - const int8_t *col_0 = lhs_buffer + (kernel_size * input_ch * i_buf) + offset; - const int8_t *row_0 = kernel + offset; - int32x4_t out_0 = vldrwq_s32(&bias[offset]); - - for (int i_ker = 0; i_ker < kernel_size; i_ker++) - { - const int32x4_t ker_0 = vldrbq_s32(row_0); - - int32x4_t ip_0 = vldrbq_s32(col_0); - ip_0 = vaddq_n_s32(ip_0, input_offset); - out_0 += vmulq_s32(ip_0, ker_0); - - col_0 += input_ch; - row_0 += input_ch; - } - - const int32x4_t mult = vldrwq_s32(&output_mult[offset]); - const int32x4_t shift = vldrwq_s32(&output_shift[offset]); - - out_0 = arm_requantize_mve_32x4(out_0, mult, shift); - out_0 = vaddq_n_s32(out_0, output_offset); - out_0 = vmaxq_s32(out_0, vdupq_n_s32(output_activation_min)); - out_0 = vminq_s32(out_0, vdupq_n_s32(output_activation_max)); - mve_pred16_t p = vctp32q((uint32_t)num_ch_to_process); - vstrbq_p_s32(out, out_0, p); - - out += 4; - } - - const int tail_ch = input_ch & 0x3; - if (tail_ch != 0) - { - out -= (4 - tail_ch); - } - } - -#else // ARM_MATH_DSP - (void)bias_dims; - /* Run the following code in cores using DSP extension */ - q15_t *const col_buffer_start = buffer_a; - q15_t *col_buffer = col_buffer_start; - const int32_t *const bias_start_pos = bias; - const q31_t *const out_mult_start_pos = output_mult; - const q31_t *const out_shift_start_pos = output_shift; - uint16_t row_count; - uint16_t row_shift; - - for (int i_out_y = 0; i_out_y < output_y; i_out_y++) - { - const int16_t base_idx_y = (i_out_y * stride_y) - pad_y; - for (int i_out_x = 0; i_out_x < output_x; i_out_x++) - { - const int16_t base_idx_x = (i_out_x * stride_x) - pad_x; - - /* Out of bounds is only considered for the y axis as it provides a contiguous zero'ing opportunity than - along the x axis */ - const int ker_y_start = MAX(0, -base_idx_y); - /* Condition for kernel end dimension: (base_idx_y + ker_y_end) < input_y */ - const int ker_y_end = MIN(kernel_y, input_y - base_idx_y); - - int32_t index = 0; - if (ker_y_start != 0) - { - memset(&col_buffer[index], 0, (kernel_x * input_ch) * ker_y_start * sizeof(q15_t)); - index += (kernel_x * input_ch) * ker_y_start; - } - - for (int i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++) - { - const int32_t idx_y = base_idx_y + i_ker_y; - - for (int i_ker_x = 0; i_ker_x < kernel_x; i_ker_x++) - { - const int32_t idx_x = base_idx_x + i_ker_x; - if (idx_x < 0 || idx_x >= input_x) - { - memset(&col_buffer[index], 0, input_ch * sizeof(q15_t)); - } - else - { - arm_q7_to_q15_with_offset((q7_t *)input + (idx_y * input_x + idx_x) * input_ch, - &col_buffer[index], - input_ch, - input_offset); - } - index += input_ch; - } - } - - const int diff = kernel_y - ker_y_end; - if (diff != 0) - { - memset(&col_buffer[index], 0, (kernel_x * input_ch) * diff * sizeof(q15_t)); - } - - row_count = output_ch / 4; - row_shift = 0; - bias = bias_start_pos; - output_mult = out_mult_start_pos; - output_shift = out_shift_start_pos; - - while (row_count) - { - q31_t sum = *bias++; - q31_t sum_2 = *bias++; - q31_t sum_3 = *bias++; - q31_t sum_4 = *bias++; - - uint16_t col_count = (kernel_x * kernel_y) / 2; - q15_t *col_pos = col_buffer_start + row_shift; - const q7_t *row_pos = kernel + row_shift; - row_shift += 4; - - while (col_count) - { - /* General idea is to read 4 + 4 (input, kernel) pair and re-arrange them in the right order to - use in a SMLAD instruction . One run of this loop produces 4 partial outputs with 8 MACs. */ - /* Note: variable names can be improved here to align with rows and columns. */ - q31_t ip_a1, ip_a2, ip_b1, ip_b2, op_a, op_b, op_c; - /* Read 4 weights */ - ip_b1 = arm_nn_read_q7x4(row_pos); - ip_a1 = arm_nn_read_q7x4(row_pos + input_ch); - op_a = arm_nn_read_q15x2(col_pos); - op_b = arm_nn_read_q15x2(col_pos + input_ch); - - ip_a2 = __SXTB16(ip_b1); - ip_b1 = __SXTB16(__ROR(ip_b1, 8)); - - ip_b2 = __SXTB16(ip_a1); - ip_a1 = __SXTB16(__ROR(ip_a1, 8)); - - op_c = __PKHBT(op_b, op_a, 16); - op_a = __PKHTB(op_b, op_a, 16); - op_b = __PKHBT(ip_b2, ip_a2, 16); - sum = __SMLAD(op_c, op_b, sum); - - op_b = __PKHBT(ip_b1, ip_a1, 16); - sum_2 = __SMLAD(op_a, op_b, sum_2); - - op_a = arm_nn_read_q15x2(col_pos + 2); - op_b = arm_nn_read_q15x2(col_pos + input_ch + 2); - - op_c = __PKHBT(op_b, op_a, 16); - op_a = __PKHTB(op_b, op_a, 16); - op_b = __PKHTB(ip_a2, ip_b2, 16); - sum_3 = __SMLAD(op_c, op_b, sum_3); - - op_b = __PKHTB(ip_a1, ip_b1, 16); - sum_4 = __SMLAD(op_a, op_b, sum_4); - - row_pos += input_ch << 1; - col_pos += input_ch << 1; - col_count--; - } - - col_count = (kernel_x * kernel_y) & 0x1; - while (col_count) - { - sum += row_pos[0] * col_pos[0]; - sum_2 += row_pos[1] * col_pos[1]; - sum_3 += row_pos[2] * col_pos[2]; - sum_4 += row_pos[3] * col_pos[3]; - - row_pos += input_ch; - col_pos += input_ch; - - col_count--; - } - sum = arm_nn_requantize(sum, *output_mult++, *output_shift++); - sum += output_offset; - sum = MAX(sum, output_activation_min); - sum = MIN(sum, output_activation_max); - *output++ = (q7_t)sum; - - sum_2 = arm_nn_requantize(sum_2, *output_mult++, *output_shift++); - sum_2 += output_offset; - sum_2 = MAX(sum_2, output_activation_min); - sum_2 = MIN(sum_2, output_activation_max); - *output++ = (q7_t)sum_2; - sum_3 = arm_nn_requantize(sum_3, *output_mult++, *output_shift++); - sum_3 += output_offset; - sum_3 = MAX(sum_3, output_activation_min); - sum_3 = MIN(sum_3, output_activation_max); - *output++ = (q7_t)sum_3; - - sum_4 = arm_nn_requantize(sum_4, *output_mult++, *output_shift++); - sum_4 += output_offset; - sum_4 = MAX(sum_4, output_activation_min); - sum_4 = MIN(sum_4, output_activation_max); - *output++ = (q7_t)sum_4; - - row_count--; - } - - row_count = output_ch & 0x3; - while (row_count) - { - q15_t *col_pos = col_buffer_start + row_shift; - const q7_t *row_pos = kernel + row_shift; - q31_t sum = *bias++; - const uint16_t col_count = (kernel_x * kernel_y); - row_shift += 1; - - for (int i = 0; i < col_count; i++) - { - sum += row_pos[i * input_ch] * col_pos[i * input_ch]; - } - sum = arm_nn_requantize(sum, *output_mult++, *output_shift++); - sum += output_offset; - sum = MAX(sum, output_activation_min); - sum = MIN(sum, output_activation_max); - *output++ = (q7_t)sum; - - row_count--; - } - - // clear counter and pointers - col_buffer = col_buffer_start; - } - } -#endif -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - return arm_depthwise_conv_s8(ctx, - dw_conv_params, - quant_params, - input_dims, - input, - filter_dims, - kernel, - bias_dims, - bias, - output_dims, - output); -#endif /* ARM_MATH_MVEI | ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -int32_t arm_depthwise_conv_s8_opt_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims) -{ -#if defined(ARM_MATH_MVEI) - /* The + 4 accounts for out of bounds read of the lhs buffers in the *_nt_t_* functions. */ - return (2 * input_dims->c * filter_dims->w * filter_dims->h) * (int32_t)sizeof(int16_t) + 4; -#elif defined(ARM_MATH_DSP) - return (input_dims->c * filter_dims->w * filter_dims->h) * sizeof(int16_t); -#else - (void)input_dims; - (void)filter_dims; - return 0; -#endif -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_u8_basic_ver1.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_u8_basic_ver1.c deleted file mode 100644 index c9d0afc..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_u8_basic_ver1.c +++ /dev/null @@ -1,336 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_depthwise_conv_u8_basic_ver1.c - * Description: u8 depthwise convolution function - * - * $Date: 09. October 2020 - * $Revision: V.1.1.1 - * - * Target : Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -static void depthwise_conv_u8_mult_4(const uint8_t *input, - const int32_t input_x, - const int32_t input_y, - const int32_t input_ch, - const uint8_t *kernel, - const int32_t output_ch, - const int32_t ch_mult, - const int32_t kernel_x, - const int32_t kernel_y, - const int32_t pad_x, - const int32_t pad_y, - const int32_t stride_x, - const int32_t stride_y, - const int32_t *bias, - uint8_t *output, - const int32_t output_shift, - const int32_t output_mult, - const int32_t output_x, - const int32_t output_y, - const int32_t output_offset, - const int32_t input_offset, - const int32_t filter_offset, - const int32_t output_activation_min, - const int32_t output_activation_max) -{ - for (int32_t in_h = -pad_y, out_h = 0, out_idx = 0; out_h < output_y; in_h += stride_y, ++out_h) - { - for (int32_t in_w = -pad_x, out_w = 0, ker_h_start = MAX(0, -in_h); out_w < output_x; in_w += stride_x, ++out_w) - { - for (int32_t in_ch = 0, out_ch = 0, ker_w_start = MAX(0, -in_w); out_ch < output_ch; - ++in_ch, out_ch += ch_mult) - { - for (int mult_tile = 0; mult_tile < ch_mult; mult_tile += 4) - { - int32_t out_buff[4]; - - out_buff[0] = 0; - out_buff[1] = 0; - out_buff[2] = 0; - out_buff[3] = 0; - - for (int32_t ker_h = ker_h_start; ker_h < MIN(kernel_y, input_y - in_h); ++ker_h) - { - int32_t ker_idx = ker_h * (output_ch * kernel_x) + ker_w_start * output_ch + out_ch; - int32_t in_idx = (in_h + ker_h) * (input_ch * input_x) + in_w * input_ch + in_ch; - - for (int32_t ker_w = ker_w_start; ker_w < MIN(kernel_x, input_x - in_w); - ++ker_w, ker_idx += output_ch) - { - int32_t in_val = input[in_idx + ker_w * input_ch] + input_offset; - out_buff[0] += in_val * (kernel[ker_idx + 0 + mult_tile] + filter_offset); - out_buff[1] += in_val * (kernel[ker_idx + 1 + mult_tile] + filter_offset); - out_buff[2] += in_val * (kernel[ker_idx + 2 + mult_tile] + filter_offset); - out_buff[3] += in_val * (kernel[ker_idx + 3 + mult_tile] + filter_offset); - } - } - - if (bias != NULL) - { - out_buff[0] += bias[out_ch + 0 + mult_tile]; - out_buff[1] += bias[out_ch + 1 + mult_tile]; - out_buff[2] += bias[out_ch + 2 + mult_tile]; - out_buff[3] += bias[out_ch + 3 + mult_tile]; - } - out_buff[0] = arm_nn_requantize(out_buff[0], output_mult, output_shift); - out_buff[1] = arm_nn_requantize(out_buff[1], output_mult, output_shift); - out_buff[2] = arm_nn_requantize(out_buff[2], output_mult, output_shift); - out_buff[3] = arm_nn_requantize(out_buff[3], output_mult, output_shift); - - out_buff[0] += output_offset; - out_buff[1] += output_offset; - out_buff[2] += output_offset; - out_buff[3] += output_offset; - - out_buff[0] = MIN(MAX(out_buff[0], output_activation_min), output_activation_max); - out_buff[1] = MIN(MAX(out_buff[1], output_activation_min), output_activation_max); - out_buff[2] = MIN(MAX(out_buff[2], output_activation_min), output_activation_max); - out_buff[3] = MIN(MAX(out_buff[3], output_activation_min), output_activation_max); - - output[out_idx++] = (uint8_t)out_buff[0]; - output[out_idx++] = (uint8_t)out_buff[1]; - output[out_idx++] = (uint8_t)out_buff[2]; - output[out_idx++] = (uint8_t)out_buff[3]; - } - } - } - } -} - -static void depthwise_conv_u8_generic(const uint8_t *input, - const int32_t input_x, - const int32_t input_y, - const int32_t input_ch, - const uint8_t *kernel, - const int32_t output_ch, - const int32_t ch_mult, - const int32_t kernel_x, - const int32_t kernel_y, - const int32_t pad_x, - const int32_t pad_y, - const int32_t stride_x, - const int32_t stride_y, - const int32_t *bias, - uint8_t *output, - const int32_t output_shift, - const int32_t output_mult, - const int32_t output_x, - const int32_t output_y, - const int32_t output_offset, - const int32_t input_offset, - const int32_t filter_offset, - const int32_t output_activation_min, - const int32_t output_activation_max) -{ - (void)output_ch; - int i_out = 0; - for (int i_out_y = 0; i_out_y < output_y; i_out_y++) - { - const int16_t base_idx_y = (i_out_y * stride_y) - pad_y; - for (int i_out_x = 0; i_out_x < output_x; i_out_x++) - { - const int16_t base_idx_x = (i_out_x * stride_x) - pad_x; - for (int i_input_ch = 0; i_input_ch < input_ch; i_input_ch++) - { - for (int i_ch_mult = 0; i_ch_mult < ch_mult; i_ch_mult++) - { - const int idx_out_ch = i_ch_mult + i_input_ch * ch_mult; - int32_t acc_0; - /* Condition for kernel start dimension: (base_idx_ + ker__start) >= 0 */ - const int ker_y_start = MAX(0, -base_idx_y); - const int ker_x_start = MAX(0, -base_idx_x); - /* Condition for kernel end dimension: (base_idx_ + ker__end) < input_ */ - const int ker_y_end = MIN(kernel_y, input_y - base_idx_y); - const int ker_x_end = MIN(kernel_x, input_x - base_idx_x); - acc_0 = 0; - - for (int i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++) - { - const int32_t idx_y = base_idx_y + i_ker_y; - for (int i_ker_x = ker_x_start; i_ker_x < ker_x_end; i_ker_x++) - { - const int32_t idx_x = base_idx_x + i_ker_x; - int32_t idx_0 = (idx_y * input_x + idx_x) * input_ch + i_input_ch; - int32_t ker_idx_0 = (i_ker_y * kernel_x + i_ker_x) * (input_ch * ch_mult) + idx_out_ch; - - acc_0 += (input[idx_0] + input_offset) * (kernel[ker_idx_0] + filter_offset); - } - } - if (bias != NULL) - { - acc_0 += bias[idx_out_ch]; - } - - /* Requantize and clamp output to provided range */ - acc_0 = arm_nn_requantize(acc_0, output_mult, output_shift); - acc_0 += output_offset; - acc_0 = MAX(acc_0, output_activation_min); - acc_0 = MIN(acc_0, output_activation_max); - - output[i_out++] = acc_0; - } - } - } - } -} - -/** - * @brief uint8 depthwise convolution function with asymmetric quantization - * - * @param[in] input Pointer to input tensor - * @param[in] input_x Width of input tensor - * @param[in] input_y Height of input tensor - * @param[in] input_ch Channels in input tensor - * @param[in] kernel Pointer to kernel weights - * @param[in] kernel_x Width of kernel - * @param[in] kernel_y Height of kernel - * @param[in] ch_mult Number of channel multiplier - * @param[in] pad_x Padding sizes x - * @param[in] pad_y Padding sizes y - * @param[in] stride_x Convolution stride along the width - * @param[in] stride_y Convolution stride along the height - * @param[in] dilation_x Dilation along width. Not used and intended for future enhancement. - * @param[in] dilation_y Dilation along height. Not used and intended for future enhancement. - * @param[in] bias Pointer to optional bias values. If no bias is - * available, NULL is expected - * @param[in] input_offset Input tensor zero offset - * @param[in] filter_offset Kernel tensor zero offset - * @param[in] output_offset Output tensor zero offset - * @param[in,out] output Pointer to output tensor - * @param[in] output_x Width of output tensor - * @param[in] output_y Height of output tensor - * @param[in] output_activation_min Minimum value to clamp the output to. Range : {0, 255} - * @param[in] output_activation_max Minimum value to clamp the output to. Range : {0, 255} - * @param[in] output_shift Amount of right-shift for output - * @param[in] output_mult Output multiplier for requantization - * @return The function returns one of the following - * ARM_MATH_SIZE_MISMATCH - Not supported dimension of tensors - * ARM_MATH_SUCCESS - Successful operation - * ARM_MATH_ARGUMENT_ERROR - Implementation not available - * - * - */ - -arm_status arm_depthwise_conv_u8_basic_ver1(const uint8_t *input, - const uint16_t input_x, - const uint16_t input_y, - const uint16_t input_ch, - const uint8_t *kernel, - const uint16_t kernel_x, - const uint16_t kernel_y, - const int16_t ch_mult, - const int16_t pad_x, - const int16_t pad_y, - const int16_t stride_x, - const int16_t stride_y, - const int16_t dilation_x, - const int16_t dilation_y, - const int32_t *bias, - const int32_t input_offset, - const int32_t filter_offset, - const int32_t output_offset, - uint8_t *output, - const uint16_t output_x, - const uint16_t output_y, - const int32_t output_activation_min, - const int32_t output_activation_max, - const int32_t output_shift, - const int32_t output_mult) -{ - (void)dilation_x; - (void)dilation_y; - - if (ch_mult % 4 == 0) - { - depthwise_conv_u8_mult_4(input, - input_x, - input_y, - input_ch, - kernel, - ch_mult * input_ch, - ch_mult, - kernel_x, - kernel_y, - pad_x, - pad_y, - stride_x, - stride_y, - bias, - output, - output_shift, - output_mult, - output_x, - output_y, - output_offset, - input_offset, - filter_offset, - output_activation_min, - output_activation_max); - } - else - { - depthwise_conv_u8_generic(input, - input_x, - input_y, - input_ch, - kernel, - ch_mult * input_ch, - ch_mult, - kernel_x, - kernel_y, - pad_x, - pad_y, - stride_x, - stride_y, - bias, - output, - output_shift, - output_mult, - output_x, - output_y, - output_offset, - input_offset, - filter_offset, - output_activation_min, - output_activation_max); - } - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c deleted file mode 100644 index 23c8e46..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_conv_wrapper_s8.c +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_depthwise_conv_wrapper_s8.c - * Description: Wrapper API to select appropriate depthwise conv API based - * on dimensions. - * - * $Date: 20. Dec 2021 - * $Revision: V.1.4.0 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/* - * s8 Depthwise conv wrapper function - * - * Refer header file for details. - * - */ -arm_status arm_depthwise_conv_wrapper_s8(const cmsis_nn_context *ctx, - const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input, - const cmsis_nn_dims *filter_dims, - const q7_t *filter, - const cmsis_nn_dims *bias_dims, - const int32_t *bias, - const cmsis_nn_dims *output_dims, - q7_t *output) -{ - arm_status status = ARM_MATH_SUCCESS; - if (1 == dw_conv_params->ch_mult && input_dims->n == 1 && dw_conv_params->dilation.w == 1 && - dw_conv_params->dilation.h == 1) - { -#if !defined(ARM_MATH_MVEI) - if ((filter_dims->w == 3) && (filter_dims->h == 3) && (dw_conv_params->padding.h <= 1) && - (dw_conv_params->padding.w <= 1)) - { - status = arm_depthwise_conv_3x3_s8(ctx, - dw_conv_params, - quant_params, - input_dims, - input, - filter_dims, - filter, - bias_dims, - bias, - output_dims, - output); - } - else -#endif - { - status = arm_depthwise_conv_s8_opt(ctx, - dw_conv_params, - quant_params, - input_dims, - input, - filter_dims, - filter, - bias_dims, - bias, - output_dims, - output); - } - } - else - { - status = arm_depthwise_conv_s8(ctx, - dw_conv_params, - quant_params, - input_dims, - input, - filter_dims, - filter, - bias_dims, - bias, - output_dims, - output); - } - - /* Return to application */ - return status; -} - -int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size(const cmsis_nn_dw_conv_params *dw_conv_params, - const cmsis_nn_dims *input_dims, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims) -{ - (void)dw_conv_params; - int32_t size = 0; - - if (input_dims->c == output_dims->c && input_dims->n == 1 && dw_conv_params->dilation.w == 1 && - dw_conv_params->dilation.h == 1) - { - size = arm_depthwise_conv_s8_opt_get_buffer_size(input_dims, filter_dims); - } - - return size; -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c deleted file mode 100644 index 729147f..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7.c +++ /dev/null @@ -1,422 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_depthwise_separable_conv_HWC_q7.c - * Description: Q7 depthwise separable convolution function - * - * $Date: July 20, 2021 - * $Revision: V.1.1.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/** - * @brief Q7 depthwise separable convolution function - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimension - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * @details - * - * Buffer size: - * - * bufferA size: 2*ch_im_in*dim_kernel*dim_kernel - * - * bufferB size: 0 - * - * Input dimension constraints: - * - * ch_im_in equals ch_im_out - * - * Implementation: - * There are 3 nested loop here: - * Inner loop: calculate each output value with MAC instruction over an accumulator - * Mid loop: loop over different output channel - * Outer loop: loop over different output (x, y) - */ - -arm_status arm_depthwise_separable_conv_HWC_q7(const q7_t *Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const q7_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const q7_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t *Im_out, - const uint16_t dim_im_out, - q15_t *bufferA, - q7_t *bufferB) -{ - (void)bufferB; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - int16_t i_out_y, i_out_x; - int16_t i_ker_y, i_ker_x; - q7_t *colBuffer = (q7_t *)bufferA; - q7_t *pBuffer = colBuffer; - const q7_t *pBias = bias; - q7_t *pOut = Im_out; - uint16_t rowCnt; - uint16_t row_shift; - - /* do some checking here, basically ch_im_in == ch_im_out */ - if (ch_im_in != ch_im_out) - { - return ARM_MATH_SIZE_MISMATCH; - } - - for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++) - { - /* we first do im2col here */ - for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++) - { - for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in) - { - /* arm_fill_q7(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, ch_im_in); - } - else - { - /* arm_copy_q7((q7_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in); - */ - memcpy(pBuffer, (q7_t *)Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - /* we will do the computation here for each channel */ - rowCnt = ch_im_out >> 2; - row_shift = 0; - pBias = bias; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = (dim_kernel * dim_kernel) >> 1; - q7_t *pB = colBuffer + row_shift; - const q7_t *pA = wt + row_shift; - row_shift += 4; - -#ifdef USE_INTRINSIC - -#ifndef ARM_MATH_BIG_ENDIAN - - while (colCnt) - { - q31_t inA1, inA2, inB1, inB2, opA, opB; - - inB1 = arm_nn_read_q7x4(pB); - pB += ch_im_in; - opB = arm_nn_read_q7x4(pB); - pB += ch_im_in; - inB2 = __PKHTB(opB, inB1, 16); - inB1 = __PKHBT(inB1, opB, 16); - inA1 = arm_nn_read_q7x4(pA); - pA += ch_im_in; - opB = arm_nn_read_q7x4(pA); - pA += ch_im_in; - inA2 = __PKHTB(opB, inA1, 16); - inA1 = __PKHBT(inA1, opB, 16); - opA = __SXTB16(inA1); - opB = __SXTB16(inB1); - sum = __SMLAD(opA, opB, sum); - opA = __SXTB16(__ROR(inA1, 8)); - opB = __SXTB16(__ROR(inB1, 8)); - sum2 = __SMLAD(opA, opB, sum2); - opA = __SXTB16(inA2); - opB = __SXTB16(inB2); - sum3 = __SMLAD(opA, opB, sum3); - opA = __SXTB16(__ROR(inA2, 8)); - opB = __SXTB16(__ROR(inB2, 8)); - sum4 = __SMLAD(opA, opB, sum4); - colCnt--; - } -#else - - while (colCnt) - { - q31_t inA1, inA2, inB1, inB2, opA, opB; - - inB1 = arm_nn_read_q7x4(pB); - pB += ch_im_in; - opB = arm_nn_read_q7x4(pB); - pB += ch_im_in; - inB2 = __PKHBT(opB, inB1, 16); - inB1 = __PKHTB(inB1, opB, 16); - inA1 = arm_nn_read_q7x4(pA); - pA += ch_im_in; - opB = arm_nn_read_q7x4(pA); - pA += ch_im_in; - inA2 = __PKHBT(opB, inA1, 16); - inA1 = __PKHTB(inA1, opB, 16); - opA = __SXTB16(inA1); - opB = __SXTB16(inB1); - sum2 = __SMLAD(opA, opB, sum2); - opA = __SXTB16(__ROR(inA1, 8)); - opB = __SXTB16(__ROR(inB1, 8)); - sum = __SMLAD(opA, opB, sum); - opA = __SXTB16(inA2); - opB = __SXTB16(inB2); - sum4 = __SMLAD(opA, opB, sum4); - opA = __SXTB16(__ROR(inA2, 8)); - opB = __SXTB16(__ROR(inB2, 8)); - sum3 = __SMLAD(opA, opB, sum3); - colCnt--; - } - -#endif /* ARM_MATH_BIG_ENDIAN */ - -#else - -#ifndef ARM_MATH_BIG_ENDIAN - /* - * r0 r1 r2 r3 r4 r5 - * inA1, inA2, inB1, inB2, opA, opB - */ - - asm volatile("COL_LOOP_%=:\n" - "ldr.w r2, [%[pB], #0]\n" - "add.w %[pB], %[pB], %[ch_im_in]\n" - "ldr.w r5, [%[pB], #0]\n" - "add.w %[pB], %[pB], %[ch_im_in]\n" - "pkhtb r3, r5, r2, ASR #16\n" - "pkhbt r2, r2, r5, LSL #16\n" - "ldr.w r0, [%[pA], #0]\n" - "add.w %[pA], %[pA], %[ch_im_in]\n" - "ldr.w r5, [%[pA], #0]\n" - "add.w %[pA], %[pA], %[ch_im_in]\n" - "pkhtb r1, r5, r0, ASR #16\n" - "pkhbt r0, r0, r5, LSL #16\n" - "sxtb16 r4, r0\n" - "sxtb16 r5, r2\n" - "smlad %[sum], r4, r5, %[sum]\n" - "mov.w r4, r0, ror #8\n" - "mov.w r5, r2, ror #8\n" - "sxtb16 r4, r4\n" - "sxtb16 r5, r5\n" - "smlad %[sum2], r4, r5, %[sum2]\n" - "sxtb16 r4, r1\n" - "sxtb16 r5, r3\n" - "smlad %[sum3], r4, r5, %[sum3]\n" - "mov.w r4, r1, ror #8\n" - "mov.w r5, r3, ror #8\n" - "sxtb16 r4, r4\n" - "sxtb16 r5, r5\n" - "smlad %[sum4], r4, r5, %[sum4]\n" - "subs %[colCnt], #1\n" - "bne COL_LOOP_%=\n" - : [ sum ] "+r"(sum), - [ sum2 ] "+r"(sum2), - [ sum3 ] "+r"(sum3), - [ sum4 ] "+r"(sum4), - [ pB ] "+r"(pB), - [ pA ] "+r"(pA) - : [ colCnt ] "r"(colCnt), [ ch_im_in ] "r"(ch_im_in) - : "r0", "r1", "r2", "r3", "r4", "r5"); -#else - /* - * r0 r1 r2 r3 r4 r5 - * inA1, inA2, inB1, inB2, opA, opB - */ - asm volatile("COL_LOOP_%=:\n" - "ldr.w r2, [%[pB], #0]\n" - "add.w %[pB], %[pB], %[ch_im_in]\n" - "ldr.w r5, [%[pB], #0]\n" - "add.w %[pB], %[pB], %[ch_im_in]\n" - "pkhbt r3, r5, r2, LSL #16\n" - "pkhtb r2, r2, r5, ASR #16\n" - "ldr.w r0, [%[pA], #0]\n" - "add.w %[pA], %[pA], %[ch_im_in]\n" - "ldr.w r5, [%[pA], #0]\n" - "add.w %[pA], %[pA], %[ch_im_in]\n" - "pkhbt r1, r5, r0, LSL #16\n" - "pkhtb r0, r0, r5, ASR #16\n" - "sxtb16 r4, r0\n" - "sxtb16 r5, r2\n" - "smlad %[sum2], r4, r5, %[sum2]\n" - "mov.w r4, r0, ror #8\n" - "mov.w r5, r2, ror #8\n" - "sxtb16 r4, r4\n" - "sxtb16 r5, r5\n" - "smlad %[sum], r4, r5, %[sum]\n" - "sxtb16 r4, r1\n" - "sxtb16 r5, r3\n" - "smlad %[sum4], r4, r5, %[sum4]\n" - "mov.w r4, r1, ror #8\n" - "mov.w r5, r3, ror #8\n" - "sxtb16 r4, r4\n" - "sxtb16 r5, r5\n" - "smlad %[sum3], r4, r5, %[sum3]\n" - "subs %[colCnt], #1\n" - "bne COL_LOOP_%=\n" - : [ sum ] "+r"(sum), - [ sum2 ] "+r"(sum2), - [ sum3 ] "+r"(sum3), - [ sum4 ] "+r"(sum4), - [ pB ] "+r"(pB), - [ pA ] "+r"(pA) - : [ colCnt ] "r"(colCnt), [ ch_im_in ] "r"(ch_im_in) - : "r0", "r1", "r2", "r3", "r4", "r5"); - -#endif /* ARM_MATH_BIG_ENDIAN */ - -#endif /* USE_INTRINSIC */ - - colCnt = (dim_kernel * dim_kernel) & 0x1; - while (colCnt) - { - union arm_nnword inA, inB; - inA.word = arm_nn_read_q7x4(pA); - pA += ch_im_in; - inB.word = arm_nn_read_q7x4(pB); - pB += ch_im_in; - sum += inA.bytes[0] * inB.bytes[0]; - sum2 += inA.bytes[1] * inB.bytes[1]; - sum3 += inA.bytes[2] * inB.bytes[2]; - sum4 += inA.bytes[3] * inB.bytes[3]; - colCnt--; - } - - *pOut++ = (q7_t)__SSAT((sum >> out_shift), 8); - *pOut++ = (q7_t)__SSAT((sum2 >> out_shift), 8); - *pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8); - *pOut++ = (q7_t)__SSAT((sum4 >> out_shift), 8); - - rowCnt--; - } - - rowCnt = ch_im_out & 0x3; - while (rowCnt) - { - q7_t *pB = colBuffer + row_shift; - const q7_t *pA = wt + row_shift; - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - uint16_t colCnt = (dim_kernel * dim_kernel); - - row_shift += 1; - - while (colCnt) - { - q7_t A1 = *pA; - q7_t B1 = *pB; - pA += ch_im_in; - pB += ch_im_in; - sum += A1 * B1; - - colCnt--; - } - *pOut++ = (q7_t)__SSAT((sum >> out_shift), 8); - rowCnt--; - } - - /* clear counter and pointers */ - pBuffer = colBuffer; - } - } - -#else - (void)bufferA; - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - int i_out_y, i_out_x, i_ch_out, i_ker_x, i_ker_y; - int conv_out; - - /* do some checking here, basically ch_im_in == ch_im_out */ - if (ch_im_in != ch_im_out) - { - return ARM_MATH_SIZE_MISMATCH; - } - - for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++) - { - for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++) - { - // for each output - conv_out = ((q31_t)(bias[i_ch_out]) << bias_shift) + NN_ROUND(out_shift); - for (i_ker_y = 0; i_ker_y < dim_kernel; i_ker_y++) - { - for (i_ker_x = 0; i_ker_x < dim_kernel; i_ker_x++) - { - int in_row = stride * i_out_y + i_ker_y - padding; - int in_col = stride * i_out_x + i_ker_x - padding; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in) - { - conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + i_ch_out] * - wt[(i_ker_y * dim_kernel + i_ker_x) * ch_im_out + i_ch_out]; - } - } - } - Im_out[(i_out_y * dim_im_out + i_out_x) * ch_im_out + i_ch_out] = - (q7_t)__SSAT((conv_out >> out_shift), 8); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c deleted file mode 100644 index 829acf9..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_depthwise_separable_conv_HWC_q7_nonsquare.c +++ /dev/null @@ -1,427 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_depthwise_separable_conv_HWC_q7_nonsquare.c - * Description: Q7 depthwise separable convolution function (non-square shape) - * - * $Date: July 20, 2021 - * $Revision: V.1.1.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup NNConv - * @{ - */ - -/** - * @brief Q7 depthwise separable convolution function (non-square shape) - * @param[in] Im_in pointer to input tensor - * @param[in] dim_im_in_x input tensor dimension x - * @param[in] dim_im_in_y input tensor dimension y - * @param[in] ch_im_in number of input tensor channels - * @param[in] wt pointer to kernel weights - * @param[in] ch_im_out number of filters, i.e., output tensor channels - * @param[in] dim_kernel_x filter kernel size x - * @param[in] dim_kernel_y filter kernel size y - * @param[in] padding_x padding sizes x - * @param[in] padding_y padding sizes y - * @param[in] stride_x convolution stride x - * @param[in] stride_y convolution stride y - * @param[in] bias pointer to bias - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in,out] Im_out pointer to output tensor - * @param[in] dim_im_out_x output tensor dimension x - * @param[in] dim_im_out_y output tensor dimension y - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] bufferB pointer to buffer space for output - * @return The function returns either - * ARM_MATH_SIZE_MISMATCH or ARM_MATH_SUCCESS based on the outcome of size checking. - * - * This function is the version with full list of optimization tricks, but with - * some constraints: - * ch_im_in is equal to ch_im_out - * - */ - -arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in, - const uint16_t dim_im_in_x, - const uint16_t dim_im_in_y, - const uint16_t ch_im_in, - const q7_t *wt, - const uint16_t ch_im_out, - const uint16_t dim_kernel_x, - const uint16_t dim_kernel_y, - const uint16_t padding_x, - const uint16_t padding_y, - const uint16_t stride_x, - const uint16_t stride_y, - const q7_t *bias, - const uint16_t bias_shift, - const uint16_t out_shift, - q7_t *Im_out, - const uint16_t dim_im_out_x, - const uint16_t dim_im_out_y, - q15_t *bufferA, - q7_t *bufferB) -{ - - (void)bufferB; - -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - /* - * Implementation: - * There are 3 nested loop here: - * Inner loop: calculate each output value with MAC instruction over an accumulator - * Mid loop: loop over different output channel - * Outer loop: loop over different output (x, y) - * - */ - - int16_t i_out_y, i_out_x; - int16_t i_ker_y, i_ker_x; - q7_t *colBuffer = (q7_t *)bufferA; - q7_t *pBuffer = colBuffer; - const q7_t *pBias = bias; - q7_t *pOut = Im_out; - uint16_t rowCnt; - uint16_t row_shift; - - /* do some checking here, basically ch_im_in == ch_im_out */ - if (ch_im_in != ch_im_out) - { - return ARM_MATH_SIZE_MISMATCH; - } - - for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++) - { - /* we first do im2col here */ - for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y; - i_ker_y++) - { - for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x; - i_ker_x++) - { - if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x) - { - /* arm_fill_q7(0, pBuffer, ch_im_in); */ - memset(pBuffer, 0, ch_im_in); - } - else - { - /* arm_copy_q7((q7_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer, - * ch_im_in); */ - memcpy(pBuffer, (q7_t *)Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, ch_im_in); - } - pBuffer += ch_im_in; - } - } - - /* we will do the computation here for each channel */ - rowCnt = ch_im_out >> 2; - row_shift = 0; - pBias = bias; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = (dim_kernel_x * dim_kernel_y) >> 1; - q7_t *pB = colBuffer + row_shift; - const q7_t *pA = wt + row_shift; - row_shift += 4; - -#ifdef USE_INTRINSIC - -#ifndef ARM_MATH_BIG_ENDIAN - - while (colCnt) - { - q31_t inA1, inA2, inB1, inB2, opA, opB; - - inB1 = arm_nn_read_q7x4(pB); - pB += ch_im_in; - opB = arm_nn_read_q7x4(pB); - pB += ch_im_in; - inB2 = __PKHTB(opB, inB1, 16); - inB1 = __PKHBT(inB1, opB, 16); - inA1 = arm_nn_read_q7x4(pA); - pA += ch_im_in; - opB = arm_nn_read_q7x4(pA); - pA += ch_im_in; - inA2 = __PKHTB(opB, inA1, 16); - inA1 = __PKHBT(inA1, opB, 16); - opA = __SXTB16(inA1); - opB = __SXTB16(inB1); - sum = __SMLAD(opA, opB, sum); - opA = __SXTB16(__ROR(inA1, 8)); - opB = __SXTB16(__ROR(inB1, 8)); - sum2 = __SMLAD(opA, opB, sum2); - opA = __SXTB16(inA2); - opB = __SXTB16(inB2); - sum3 = __SMLAD(opA, opB, sum3); - opA = __SXTB16(__ROR(inA2, 8)); - opB = __SXTB16(__ROR(inB2, 8)); - sum4 = __SMLAD(opA, opB, sum4); - colCnt--; - } -#else - - while (colCnt) - { - q31_t inA1, inA2, inB1, inB2, opA, opB; - - inB1 = arm_nn_read_q7x4(pB); - pB += ch_im_in; - opB = arm_nn_read_q7x4(pB); - pB += ch_im_in; - inB2 = __PKHBT(opB, inB1, 16); - inB1 = __PKHTB(inB1, opB, 16); - inA1 = arm_nn_read_q7x4(pA); - pA += ch_im_in; - opB = arm_nn_read_q7x4(pA); - pA += ch_im_in; - inA2 = __PKHBT(opB, inA1, 16); - inA1 = __PKHTB(inA1, opB, 16); - opA = __SXTB16(inA1); - opB = __SXTB16(inB1); - sum2 = __SMLAD(opA, opB, sum2); - opA = __SXTB16(__ROR(inA1, 8)); - opB = __SXTB16(__ROR(inB1, 8)); - sum = __SMLAD(opA, opB, sum); - opA = __SXTB16(inA2); - opB = __SXTB16(inB2); - sum4 = __SMLAD(opA, opB, sum4); - opA = __SXTB16(__ROR(inA2, 8)); - opB = __SXTB16(__ROR(inB2, 8)); - sum3 = __SMLAD(opA, opB, sum3); - colCnt--; - } - -#endif /* ARM_MATH_BIG_ENDIAN */ - -#else - -#ifndef ARM_MATH_BIG_ENDIAN - // r0 r1 r2 r3 r4 r5 - // inA1, inA2, inB1, inB2, opA, opB - asm volatile("COL_LOOP:\n" - "ldr.w r2, [%[pB], #0]\n" - "add.w %[pB], %[pB], %[ch_im_in]\n" - "ldr.w r5, [%[pB], #0]\n" - "add.w %[pB], %[pB], %[ch_im_in]\n" - "pkhtb r3, r5, r2, ASR #16\n" - "pkhbt r2, r2, r5, LSL #16\n" - "ldr.w r0, [%[pA], #0]\n" - "add.w %[pA], %[pA], %[ch_im_in]\n" - "ldr.w r5, [%[pA], #0]\n" - "add.w %[pA], %[pA], %[ch_im_in]\n" - "pkhtb r1, r5, r0, ASR #16\n" - "pkhbt r0, r0, r5, LSL #16\n" - "sxtb16 r4, r0\n" - "sxtb16 r5, r2\n" - "smlad %[sum], r4, r5, %[sum]\n" - "mov.w r4, r0, ror #8\n" - "mov.w r5, r2, ror #8\n" - "sxtb16 r4, r4\n" - "sxtb16 r5, r5\n" - "smlad %[sum2], r4, r5, %[sum2]\n" - "sxtb16 r4, r1\n" - "sxtb16 r5, r3\n" - "smlad %[sum3], r4, r5, %[sum3]\n" - "mov.w r4, r1, ror #8\n" - "mov.w r5, r3, ror #8\n" - "sxtb16 r4, r4\n" - "sxtb16 r5, r5\n" - "smlad %[sum4], r4, r5, %[sum4]\n" - "subs %[colCnt], #1\n" - "bne COL_LOOP\n" - : [ sum ] "+r"(sum), - [ sum2 ] "+r"(sum2), - [ sum3 ] "+r"(sum3), - [ sum4 ] "+r"(sum4), - [ pB ] "+r"(pB), - [ pA ] "+r"(pA) - : [ colCnt ] "r"(colCnt), [ ch_im_in ] "r"(ch_im_in) - : "r0", "r1", "r2", "r3", "r4", "r5"); -#else - // r0 r1 r2 r3 r4 r5 - // inA1, inA2, inB1, inB2, opA, opB - asm volatile("COL_LOOP:\n" - "ldr.w r2, [%[pB], #0]\n" - "add.w %[pB], %[pB], %[ch_im_in]\n" - "ldr.w r5, [%[pB], #0]\n" - "add.w %[pB], %[pB], %[ch_im_in]\n" - "pkhbt r3, r5, r2, LSL #16\n" - "pkhtb r2, r2, r5, ASR #16\n" - "ldr.w r0, [%[pA], #0]\n" - "add.w %[pA], %[pA], %[ch_im_in]\n" - "ldr.w r5, [%[pA], #0]\n" - "add.w %[pA], %[pA], %[ch_im_in]\n" - "pkhbt r1, r5, r0, LSL #16\n" - "pkhtb r0, r0, r5, ASR #16\n" - "sxtb16 r4, r0\n" - "sxtb16 r5, r2\n" - "smlad %[sum2], r4, r5, %[sum2]\n" - "mov.w r4, r0, ror #8\n" - "mov.w r5, r2, ror #8\n" - "sxtb16 r4, r4\n" - "sxtb16 r5, r5\n" - "smlad %[sum], r4, r5, %[sum]\n" - "sxtb16 r4, r1\n" - "sxtb16 r5, r3\n" - "smlad %[sum4], r4, r5, %[sum4]\n" - "mov.w r4, r1, ror #8\n" - "mov.w r5, r3, ror #8\n" - "sxtb16 r4, r4\n" - "sxtb16 r5, r5\n" - "smlad %[sum3], r4, r5, %[sum3]\n" - "subs %[colCnt], #1\n" - "bne COL_LOOP\n" - : [ sum ] "+r"(sum), - [ sum2 ] "+r"(sum2), - [ sum3 ] "+r"(sum3), - [ sum4 ] "+r"(sum4), - [ pB ] "+r"(pB), - [ pA ] "+r"(pA) - : [ colCnt ] "r"(colCnt), [ ch_im_in ] "r"(ch_im_in) - : "r0", "r1", "r2", "r3", "r4", "r5"); -#endif /*ARM_MATH_BIG_ENDIAN */ - -#endif /* USE_INTRINSIC */ - - colCnt = (dim_kernel_x * dim_kernel_y) & 0x1; - while (colCnt) - { - union arm_nnword inA, inB; - inA.word = arm_nn_read_q7x4(pA); - pA += ch_im_in; - inB.word = arm_nn_read_q7x4(pB); - pB += ch_im_in; - sum += inA.bytes[0] * inB.bytes[0]; - sum2 += inA.bytes[1] * inB.bytes[1]; - sum3 += inA.bytes[2] * inB.bytes[2]; - sum4 += inA.bytes[3] * inB.bytes[3]; - colCnt--; - } - - *pOut++ = (q7_t)__SSAT((sum >> out_shift), 8); - *pOut++ = (q7_t)__SSAT((sum2 >> out_shift), 8); - *pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8); - *pOut++ = (q7_t)__SSAT((sum4 >> out_shift), 8); - - rowCnt--; - } - - rowCnt = ch_im_out & 0x3; - while (rowCnt) - { - q7_t *pB = colBuffer + row_shift; - const q7_t *pA = wt + row_shift; - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - uint16_t colCnt = (dim_kernel_x * dim_kernel_y); - - row_shift += 1; - - while (colCnt) - { - q7_t A1 = *pA; - q7_t B1 = *pB; - pA += ch_im_in; - pB += ch_im_in; - sum += A1 * B1; - - colCnt--; - } - *pOut++ = (q7_t)__SSAT((sum >> out_shift), 8); - rowCnt--; - } - - // clear counter and pointers - pBuffer = colBuffer; - } - } - -#else - (void)bufferA; - - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - int i_out_y, i_out_x, i_ch_out; - int i_ker_y, i_ker_x; - - /* do some checking here, basically ch_im_in == ch_im_out */ - if (ch_im_in != ch_im_out) - { - return ARM_MATH_SIZE_MISMATCH; - } - - for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++) - { - for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++) - { - for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++) - { - // for each output - int conv_out = ((q31_t)(bias[i_ch_out]) << bias_shift) + NN_ROUND(out_shift); - for (i_ker_y = 0; i_ker_y < dim_kernel_y; i_ker_y++) - { - for (i_ker_x = 0; i_ker_x < dim_kernel_x; i_ker_x++) - { - int in_row = stride_y * i_out_y + i_ker_y - padding_y; - int in_col = stride_x * i_out_x + i_ker_x - padding_x; - if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x) - { - conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + i_ch_out] * - wt[(i_ker_y * dim_kernel_x + i_ker_x) * ch_im_out + i_ch_out]; - } - } - } - Im_out[(i_out_y * dim_im_out_x + i_out_x) * ch_im_out + i_ch_out] = - (q7_t)__SSAT((conv_out >> out_shift), 8); - } - } - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNConv group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_depthwise_conv_s8_core.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_depthwise_conv_s8_core.c deleted file mode 100644 index 481eeba..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_depthwise_conv_s8_core.c +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_depthwise_conv_s8_core.c - * Description: Depthwise convolution on im2col buffers. - * - * $Date: 09. October 2020 - * $Revision: V.1.0.4 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/* - * Depthwise conv on an im2col buffer where the input channel equals - * output channel. - * - * Refer header file for details. - * - */ - -q7_t *arm_nn_depthwise_conv_s8_core(const q7_t *row, - const q15_t *col, - const uint16_t num_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int32_t activation_min, - const int32_t activation_max, - const uint16_t kernel_size, - const int32_t *const output_bias, - q7_t *out) -{ -#if defined(ARM_MATH_MVEI) - int32_t ch_per_loop = num_ch / 4; - - const int32_t *bias = output_bias; - int8_t *out_tmp = out; - - int32_t idx = 0; - - while (ch_per_loop > 0) - { - int32x4_t ip_0; - int32x4_t ip_1; - int32_t ker_loop = kernel_size / 3; - int32x4_t out_0 = vldrwq_s32(bias); - int32x4_t out_1 = out_0; - bias += 4; - - const int32_t offset = idx * 4; - const int8_t *row_0 = row + offset; - const int16_t *col_0 = col + offset; - const int16_t *col_1 = col + kernel_size * num_ch + offset; - - int32x4_t ker_0 = vldrbq_s32(row_0); - - while (ker_loop > 0) - { - const int8_t *row_1 = row_0 + num_ch; - const int8_t *row_2 = row_0 + 2 * num_ch; - const int32x4_t ker_1 = vldrbq_s32(row_1); - const int32x4_t ker_2 = vldrbq_s32(row_2); - - ip_0 = vldrhq_s32(col_0); - ip_1 = vldrhq_s32(col_1); - col_0 += num_ch; - col_1 += num_ch; - - out_0 += vmulq_s32(ip_0, ker_0); - out_1 += vmulq_s32(ip_1, ker_0); - - ip_0 = vldrhq_s32(col_0); - ip_1 = vldrhq_s32(col_1); - col_0 += num_ch; - col_1 += num_ch; - - out_0 += vmulq_s32(ip_0, ker_1); - out_1 += vmulq_s32(ip_1, ker_1); - - ip_0 = vldrhq_s32(col_0); - ip_1 = vldrhq_s32(col_1); - col_0 += num_ch; - col_1 += num_ch; - - out_0 += vmulq_s32(ip_0, ker_2); - out_1 += vmulq_s32(ip_1, ker_2); - row_0 += 3 * num_ch; - - ker_0 = vldrbq_s32(row_0); - ker_loop--; - } - - idx++; - /* Handle tail kernel elements */ - ker_loop = kernel_size - ((kernel_size / 3) * 3); - while (ker_loop > 0) - { - ip_0 = vldrhq_s32(col_0); - ip_1 = vldrhq_s32(col_1); - - out_0 += vmulq_s32(ip_0, ker_0); - out_1 += vmulq_s32(ip_1, ker_0); - - col_0 += num_ch; - col_1 += num_ch; - - ip_0 = vldrhq_s32(col_0); - ip_1 = vldrhq_s32(col_1); - - row_0 += num_ch; - ker_0 = vldrbq_s32(row_0); - ker_loop--; - } - const int32x4_t mult = vldrwq_s32(out_mult); - const int32x4_t shift = vldrwq_s32(out_shift); - out_mult += 4; - out_shift += 4; - - out_0 = arm_requantize_mve_32x4(out_0, mult, shift); - out_1 = arm_requantize_mve_32x4(out_1, mult, shift); - - out_0 = vaddq_n_s32(out_0, out_offset); - out_0 = vmaxq_s32(out_0, vdupq_n_s32(activation_min)); - out_0 = vminq_s32(out_0, vdupq_n_s32(activation_max)); - vstrbq_s32(out_tmp, out_0); - - out_1 = vaddq_n_s32(out_1, out_offset); - out_1 = vmaxq_s32(out_1, vdupq_n_s32(activation_min)); - out_1 = vminq_s32(out_1, vdupq_n_s32(activation_max)); - vstrbq_s32(out_tmp + num_ch, out_1); - - out_tmp += 4; - ch_per_loop--; - } - - int32_t tail_ch = num_ch & 3; - if (tail_ch != 0) - { - int32_t ch_idx = (num_ch & ~3); - int32x4_t col_0_sum; - int32x4_t col_1_sum; - - const int32_t single_buffer_size = kernel_size * num_ch; - for (int i = 0; i < tail_ch; i++) - { - const int16_t *col_pos_0 = col + ch_idx; - const int16_t *col_pos_1 = col_pos_0 + single_buffer_size; - - const int8_t *row_pos = row + ch_idx; - int32_t sum_0 = bias[i]; - int32_t sum_1 = bias[i]; - - for (int j = 0; j < kernel_size; j++) - { - const int8_t row_val = row_pos[j * num_ch]; - sum_0 += row_val * col_pos_0[j * num_ch]; - sum_1 += row_val * col_pos_1[j * num_ch]; - } - col_0_sum[i] = sum_0; - col_1_sum[i] = sum_1; - - ch_idx++; - } - const mve_pred16_t p = vctp32q((uint32_t)tail_ch); - const int32x4_t mult = vldrwq_z_s32(out_mult, p); - const int32x4_t shift = vldrwq_z_s32(out_shift, p); - - col_0_sum = arm_requantize_mve_32x4(col_0_sum, mult, shift); - col_1_sum = arm_requantize_mve_32x4(col_1_sum, mult, shift); - - col_0_sum = vaddq_n_s32(col_0_sum, out_offset); - col_0_sum = vmaxq_s32(col_0_sum, vdupq_n_s32(activation_min)); - col_0_sum = vminq_s32(col_0_sum, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out_tmp, col_0_sum, p); - - col_1_sum = vaddq_n_s32(col_1_sum, out_offset); - col_1_sum = vmaxq_s32(col_1_sum, vdupq_n_s32(activation_min)); - col_1_sum = vminq_s32(col_1_sum, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out_tmp + num_ch, col_1_sum, p); - - out_tmp += tail_ch; - } - - return out_tmp + num_ch; -#else - (void)row; - (void)col; - (void)num_ch; - (void)out_shift; - (void)out_mult; - (void)out_offset; - (void)activation_min; - (void)activation_max; - (void)kernel_size; - (void)output_bias; - (void)out; - return NULL; -#endif -} diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c deleted file mode 100644 index 05c95b6..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15.c +++ /dev/null @@ -1,186 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mat_mult_kernel_q7_q15.c - * Description: Matrix-multiplication function for convolution - * - * $Date: January 26, 2021 - * $Revision: V.1.0.2 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @brief Matrix-multiplication function for convolution. - * - * @details Refer to header file for details. - * - */ - -q7_t *arm_nn_mat_mult_kernel_q7_q15(const q7_t *pA, - const q15_t *pInBuffer, - const uint16_t ch_im_out, - const uint16_t numCol_A, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t *bias, - q7_t *pOut) -{ -#if defined(ARM_MATH_DSP) - /* set up the second output pointers */ - q7_t *pOut2 = pOut + ch_im_out; - const q7_t *pBias = bias; - - uint16_t rowCnt = ch_im_out >> 1; - /* this loop over rows in A */ - while (rowCnt) - { - /* setup pointers for B */ - const q15_t *pB = pInBuffer; - const q15_t *pB2 = pB + numCol_A; - - /* align the second pointer for A */ - const q7_t *pA2 = pA + numCol_A; - - /* init the sum with bias */ - q31_t sum = ((q31_t)(*pBias) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(*pBias) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = numCol_A >> 2; - /* accumulate over the vector */ - while (colCnt) - { - q31_t inA11, inA12, inA21, inA22; - - q31_t inB1 = arm_nn_read_q15x2_ia(&pB); - q31_t inB2 = arm_nn_read_q15x2_ia(&pB2); - - pA = read_and_pad(pA, &inA11, &inA12); - pA2 = read_and_pad(pA2, &inA21, &inA22); - - sum = __SMLAD(inA11, inB1, sum); - sum2 = __SMLAD(inA11, inB2, sum2); - sum3 = __SMLAD(inA21, inB1, sum3); - sum4 = __SMLAD(inA21, inB2, sum4); - - inB1 = arm_nn_read_q15x2_ia(&pB); - inB2 = arm_nn_read_q15x2_ia(&pB2); - - sum = __SMLAD(inA12, inB1, sum); - sum2 = __SMLAD(inA12, inB2, sum2); - sum3 = __SMLAD(inA22, inB1, sum3); - sum4 = __SMLAD(inA22, inB2, sum4); - - colCnt--; - } /* while over colCnt */ - colCnt = numCol_A & 0x3; - while (colCnt) - { - q7_t inA1 = *pA++; - q15_t inB1 = *pB++; - q7_t inA2 = *pA2++; - q15_t inB2 = *pB2++; - - sum += inA1 * inB1; - sum2 += inA1 * inB2; - sum3 += inA2 * inB1; - sum4 += inA2 * inB2; - colCnt--; - } /* while over colCnt */ - *pOut++ = (q7_t)__SSAT((sum >> out_shift), 8); - *pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8); - *pOut2++ = (q7_t)__SSAT((sum2 >> out_shift), 8); - *pOut2++ = (q7_t)__SSAT((sum4 >> out_shift), 8); - - /* skip the row computed with A2 */ - pA += numCol_A; - rowCnt--; - } /* for over ch_im_out */ - - /* compute left-over row if any */ - if (ch_im_out & 0x1) - { - /* setup pointers for B */ - const q15_t *pB = pInBuffer; - const q15_t *pB2 = pB + numCol_A; - - /* load the bias */ - q31_t sum = ((q31_t)(*pBias) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = numCol_A >> 2; - while (colCnt) - { - q31_t inA11, inA12; - - q31_t inB1 = arm_nn_read_q15x2_ia(&pB); - q31_t inB2 = arm_nn_read_q15x2_ia(&pB2); - - pA = read_and_pad(pA, &inA11, &inA12); - - sum = __SMLAD(inA11, inB1, sum); - sum2 = __SMLAD(inA11, inB2, sum2); - - inB1 = arm_nn_read_q15x2_ia(&pB); - inB2 = arm_nn_read_q15x2_ia(&pB2); - - sum = __SMLAD(inA12, inB1, sum); - sum2 = __SMLAD(inA12, inB2, sum2); - - colCnt--; - } - colCnt = numCol_A & 0x3; - while (colCnt) - { - q7_t inA1 = *pA++; - q15_t inB1 = *pB++; - q15_t inB2 = *pB2++; - - sum += inA1 * inB1; - sum2 += inA1 * inB2; - colCnt--; - } - - *pOut++ = (q7_t)__SSAT((sum >> out_shift), 8); - *pOut2++ = (q7_t)__SSAT((sum2 >> out_shift), 8); - } - - pOut += ch_im_out; - - /* return the new output pointer with offset */ - return pOut; -#else - (void)pA; - (void)pInBuffer; - (void)ch_im_out; - (void)numCol_A; - (void)bias_shift; - (void)out_shift; - (void)bias; - (void)pOut; - /* To be completed */ - return NULL; -#endif /* ARM_MATH_DSP */ -} diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c deleted file mode 100644 index 0870ac3..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_q7_q15_reordered.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mat_mult_kernel_q7_q15_reordered.c - * Description: Matrix-multiplication function for convolution with reordered columns - * - * $Date: January 26, 2021 - * $Revision: V.1.0.2 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @brief Matrix-multiplication function for convolution with re-ordered input. - * - * @details Refer to header file for details. - * - */ - -q7_t *arm_nn_mat_mult_kernel_q7_q15_reordered(const q7_t *pA, - const q15_t *pInBuffer, - const uint16_t ch_im_out, - const uint16_t numCol_A, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t *bias, - q7_t *pOut) -{ - -#if defined(ARM_MATH_DSP) - /* set up the second output pointers */ - q7_t *pOut2 = pOut + ch_im_out; - int i; - - /* this loop over rows in A */ - for (i = 0; i < ch_im_out; i += 2) - { - /* setup pointers for B */ - const q15_t *pB = pInBuffer; - const q15_t *pB2 = pB + numCol_A; - - /* align the second pointer for A */ - const q7_t *pA2 = pA + numCol_A; - - /* init the sum with bias */ - q31_t sum = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(bias[i + 1]) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(bias[i + 1]) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = numCol_A >> 2; - /* accumulate over the vector */ - while (colCnt) - { - q31_t inA11, inA12, inA21, inA22; - - q31_t inB1 = arm_nn_read_q15x2_ia(&pB); - q31_t inB2 = arm_nn_read_q15x2_ia(&pB2); - - pA = read_and_pad_reordered(pA, &inA11, &inA12); - pA2 = read_and_pad_reordered(pA2, &inA21, &inA22); - - sum = __SMLAD(inA11, inB1, sum); - sum2 = __SMLAD(inA11, inB2, sum2); - sum3 = __SMLAD(inA21, inB1, sum3); - sum4 = __SMLAD(inA21, inB2, sum4); - - inB1 = arm_nn_read_q15x2_ia(&pB); - inB2 = arm_nn_read_q15x2_ia(&pB2); - - sum = __SMLAD(inA12, inB1, sum); - sum2 = __SMLAD(inA12, inB2, sum2); - sum3 = __SMLAD(inA22, inB1, sum3); - sum4 = __SMLAD(inA22, inB2, sum4); - - colCnt--; - } /* while over colCnt */ - colCnt = numCol_A & 0x3; - while (colCnt) - { - q7_t inA1 = *pA++; - q15_t inB1 = *pB++; - q7_t inA2 = *pA2++; - q15_t inB2 = *pB2++; - - sum += inA1 * inB1; - sum2 += inA1 * inB2; - sum3 += inA2 * inB1; - sum4 += inA2 * inB2; - colCnt--; - } /* while over colCnt */ - *pOut++ = (q7_t)__SSAT((sum >> out_shift), 8); - *pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8); - *pOut2++ = (q7_t)__SSAT((sum2 >> out_shift), 8); - *pOut2++ = (q7_t)__SSAT((sum4 >> out_shift), 8); - - /* skip the row computed with A2 */ - pA += numCol_A; - } /* for over ch_im_out */ - - pOut += ch_im_out; - - /* return the new output pointer with offset */ - return pOut; -#else - (void)pA; - (void)pInBuffer; - (void)ch_im_out; - (void)numCol_A; - (void)bias_shift; - (void)out_shift; - (void)bias; - (void)pOut; - /* To be completed */ - return NULL; -#endif /* ARM_MATH_DSP */ -} diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c deleted file mode 100644 index cb30068..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16.c +++ /dev/null @@ -1,245 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mat_mult_kernel_s8_s16.c - * Description: Matrix-multiplication function for convolution - * - * $Date: 14. December 2021 - * $Revision: V.1.1.0 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/* - * Matrix-multiplication function for convolution with per-channel requantization. - * - * Refer header file for details. - * - */ - -q7_t *arm_nn_mat_mult_kernel_s8_s16(const q7_t *input_a, - const q15_t *input_b, - const uint16_t output_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int16_t activation_min, - const int16_t activation_max, - const uint16_t num_col_a, - const int32_t *const output_bias, - q7_t *out_0) -{ -#if !defined(ARM_MATH_MVEI) - /* set up the second output pointers */ - q7_t *out_1 = out_0 + output_ch; - const int32_t *bias = output_bias; - - uint16_t row_count = output_ch / 2; - const q7_t *ip_a0 = input_a; - /* this loop over rows in A */ - while (row_count) - { - /* setup pointers for B */ - const q15_t *ip_b0 = input_b; - const q15_t *ip_b1 = ip_b0 + num_col_a; - - /* align the second pointer for A */ - const q7_t *ip_a1 = ip_a0 + num_col_a; - - q31_t ch_0_out_0 = 0; - q31_t ch_0_out_1 = 0; - q31_t ch_1_out_0 = 0; - q31_t ch_1_out_1 = 0; - /* Init accumulator with bias for channel N and N + 1 */ - if (bias) - { - ch_0_out_0 = *bias; - ch_0_out_1 = *bias++; - ch_1_out_0 = *bias; - ch_1_out_1 = *bias++; - } - -#if defined(ARM_MATH_DSP) - uint16_t col_count = num_col_a / 4; - /* accumulate over the vector */ - while (col_count) - { - q31_t a01, a02, a11, a12; - q31_t b0 = arm_nn_read_q15x2_ia(&ip_b0); - q31_t b1 = arm_nn_read_q15x2_ia(&ip_b1); - - ip_a0 = read_and_pad(ip_a0, &a01, &a02); - ip_a1 = read_and_pad(ip_a1, &a11, &a12); - - ch_0_out_0 = __SMLAD(a01, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a01, b1, ch_0_out_1); - ch_1_out_0 = __SMLAD(a11, b0, ch_1_out_0); - ch_1_out_1 = __SMLAD(a11, b1, ch_1_out_1); - - b0 = arm_nn_read_q15x2_ia(&ip_b0); - b1 = arm_nn_read_q15x2_ia(&ip_b1); - - ch_0_out_0 = __SMLAD(a02, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a02, b1, ch_0_out_1); - ch_1_out_0 = __SMLAD(a12, b0, ch_1_out_0); - ch_1_out_1 = __SMLAD(a12, b1, ch_1_out_1); - - col_count--; - } /* while over col_count */ - col_count = num_col_a & 0x3; -#else - uint16_t col_count = num_col_a; -#endif - while (col_count) - { - q7_t a0 = *ip_a0++; - q15_t b0 = *ip_b0++; - q7_t a1 = *ip_a1++; - q15_t b1 = *ip_b1++; - - ch_0_out_0 += a0 * b0; - ch_0_out_1 += a0 * b1; - ch_1_out_0 += a1 * b0; - ch_1_out_1 += a1 * b1; - col_count--; - } /* while over col_count */ - - ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift); - ch_0_out_0 += out_offset; - ch_0_out_0 = MAX(ch_0_out_0, activation_min); - ch_0_out_0 = MIN(ch_0_out_0, activation_max); - *out_0++ = (q7_t)ch_0_out_0; - - ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift); - ch_0_out_1 += out_offset; - ch_0_out_1 = MAX(ch_0_out_1, activation_min); - ch_0_out_1 = MIN(ch_0_out_1, activation_max); - *out_1++ = (q7_t)ch_0_out_1; - out_mult++; - out_shift++; - - ch_1_out_0 = arm_nn_requantize(ch_1_out_0, *out_mult, *out_shift); - ch_1_out_0 += out_offset; - ch_1_out_0 = MAX(ch_1_out_0, activation_min); - ch_1_out_0 = MIN(ch_1_out_0, activation_max); - *out_0++ = (q7_t)ch_1_out_0; - - ch_1_out_1 = arm_nn_requantize(ch_1_out_1, *out_mult, *out_shift); - ch_1_out_1 += out_offset; - ch_1_out_1 = MAX(ch_1_out_1, activation_min); - ch_1_out_1 = MIN(ch_1_out_1, activation_max); - *out_1++ = (q7_t)ch_1_out_1; - out_mult++; - out_shift++; - - /* skip row */ - ip_a0 += num_col_a; - row_count--; - } - - /* compute the last odd numbered row if any */ - if (output_ch & 0x1) - { - /* setup pointers for B */ - const q15_t *ip_b0 = input_b; - const q15_t *ip_b1 = ip_b0 + num_col_a; - - q31_t ch_0_out_0 = 0; - q31_t ch_0_out_1 = 0; - - /* load the bias */ - if (bias) - { - ch_0_out_0 = *bias; - ch_0_out_1 = *bias++; - } - -#if defined(ARM_MATH_DSP) - uint16_t col_count = num_col_a >> 2; - while (col_count) - { - q31_t a01, a02; - q31_t b0 = arm_nn_read_q15x2_ia(&ip_b0); - q31_t b1 = arm_nn_read_q15x2_ia(&ip_b1); - - ip_a0 = read_and_pad(ip_a0, &a01, &a02); - - ch_0_out_0 = __SMLAD(a01, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a01, b1, ch_0_out_1); - - b0 = arm_nn_read_q15x2_ia(&ip_b0); - b1 = arm_nn_read_q15x2_ia(&ip_b1); - ch_0_out_0 = __SMLAD(a02, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a02, b1, ch_0_out_1); - - col_count--; - } - col_count = num_col_a & 0x3; -#else - uint16_t col_count = num_col_a; -#endif - while (col_count) - { - q7_t a0 = *ip_a0++; - q15_t b0 = *ip_b0++; - q15_t b1 = *ip_b1++; - - ch_0_out_0 += a0 * b0; - ch_0_out_1 += a0 * b1; - col_count--; - } - ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift); - ch_0_out_0 += out_offset; - ch_0_out_0 = MAX(ch_0_out_0, activation_min); - ch_0_out_0 = MIN(ch_0_out_0, activation_max); - *out_0++ = (q7_t)ch_0_out_0; - - ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift); - ch_0_out_1 += out_offset; - ch_0_out_1 = MAX(ch_0_out_1, activation_min); - ch_0_out_1 = MIN(ch_0_out_1, activation_max); - *out_1++ = (q7_t)ch_0_out_1; - out_mult++; - out_shift++; - } - - out_0 += output_ch; - - /* return the new output pointer with offset */ - return out_0; -#else - (void)input_a; - (void)input_b; - (void)output_ch; - (void)out_shift; - (void)out_mult; - (void)out_offset; - (void)activation_min; - (void)activation_max; - (void)num_col_a; - (void)output_bias; - (void)out_0; - /* To be completed */ - return NULL; -#endif -} diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c deleted file mode 100644 index 842a180..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_kernel_s8_s16_reordered.c +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mat_mult_kernel_s8_s16_reordered.c - * Description: Matrix-multiplication function for convolution with reordered columns - * - * $Date: 09. October 2020 - * $Revision: V.1.0.3 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/* - * Matrix-multiplication with re-ordered input and bias inputs for convolution with per-channel - * requantization. The re-ordering is a consequence of sign extension is done by the SXTB16 command. - * - * Refer header file for details. This function differs from arm_nn_mat_mult_kernel_s8_s16(), in that it uses - * read_and_pad_reordered() instead of arm_nn_mat_mult_kernel_s8_s16(). Investigating the cycles impact and - * unifying these two functions is a potential future improvement. - * - */ - -q7_t *arm_nn_mat_mult_kernel_s8_s16_reordered(const q7_t *input_a, - const q15_t *input_b, - const uint16_t output_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int16_t activation_min, - const int16_t activation_max, - const uint16_t num_col_a, - const int32_t *const output_bias, - q7_t *out_0) -{ -#if defined(ARM_MATH_DSP) - /* set up the second output pointers */ - q7_t *out_1 = out_0 + output_ch; - const int32_t *bias = output_bias; - - uint16_t row_count = output_ch / 2; - const q7_t *ip_a0 = input_a; - /* this loop over rows in A */ - while (row_count) - { - /* setup pointers for B */ - const q15_t *ip_b0 = input_b; - const q15_t *ip_b1 = ip_b0 + num_col_a; - - /* align the second pointer for A */ - const q7_t *ip_a1 = ip_a0 + num_col_a; - - /* Init accumulator with bias for channel N and N + 1 */ - q31_t ch_0_out_0 = *bias; - q31_t ch_0_out_1 = *bias++; - q31_t ch_1_out_0 = *bias; - q31_t ch_1_out_1 = *bias++; - - uint16_t col_count = num_col_a / 4; - /* accumulate over the vector */ - while (col_count) - { - q31_t a01, a02, a11, a12; - q31_t b0 = arm_nn_read_q15x2_ia(&ip_b0); - q31_t b1 = arm_nn_read_q15x2_ia(&ip_b1); - - ip_a0 = read_and_pad_reordered(ip_a0, &a01, &a02); - ip_a1 = read_and_pad_reordered(ip_a1, &a11, &a12); - - ch_0_out_0 = __SMLAD(a01, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a01, b1, ch_0_out_1); - ch_1_out_0 = __SMLAD(a11, b0, ch_1_out_0); - ch_1_out_1 = __SMLAD(a11, b1, ch_1_out_1); - - b0 = arm_nn_read_q15x2_ia(&ip_b0); - b1 = arm_nn_read_q15x2_ia(&ip_b1); - - ch_0_out_0 = __SMLAD(a02, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a02, b1, ch_0_out_1); - ch_1_out_0 = __SMLAD(a12, b0, ch_1_out_0); - ch_1_out_1 = __SMLAD(a12, b1, ch_1_out_1); - - col_count--; - } /* while over col_count */ - - ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift); - ch_0_out_0 += out_offset; - ch_0_out_0 = MAX(ch_0_out_0, activation_min); - ch_0_out_0 = MIN(ch_0_out_0, activation_max); - *out_0++ = (q7_t)ch_0_out_0; - - ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift); - ch_0_out_1 += out_offset; - ch_0_out_1 = MAX(ch_0_out_1, activation_min); - ch_0_out_1 = MIN(ch_0_out_1, activation_max); - *out_1++ = (q7_t)ch_0_out_1; - out_mult++; - out_shift++; - - ch_1_out_0 = arm_nn_requantize(ch_1_out_0, *out_mult, *out_shift); - ch_1_out_0 += out_offset; - ch_1_out_0 = MAX(ch_1_out_0, activation_min); - ch_1_out_0 = MIN(ch_1_out_0, activation_max); - *out_0++ = (q7_t)ch_1_out_0; - - ch_1_out_1 = arm_nn_requantize(ch_1_out_1, *out_mult, *out_shift); - ch_1_out_1 += out_offset; - ch_1_out_1 = MAX(ch_1_out_1, activation_min); - ch_1_out_1 = MIN(ch_1_out_1, activation_max); - *out_1++ = (q7_t)ch_1_out_1; - out_mult++; - out_shift++; - - /* skip row */ - ip_a0 += num_col_a; - row_count--; - } - - if (output_ch & 1) - { - /* setup pointers for B */ - const q15_t *ip_b0 = input_b; - const q15_t *ip_b1 = ip_b0 + num_col_a; - - /* Init accumulator with bias for channel N + 1 */ - q31_t ch_0_out_0 = *bias; - q31_t ch_0_out_1 = ch_0_out_0; - - int32_t col_count = num_col_a / 4; - while (col_count) - { - q31_t a01, a02; - q31_t b0 = arm_nn_read_q15x2_ia(&ip_b0); - q31_t b1 = arm_nn_read_q15x2_ia(&ip_b1); - - ip_a0 = read_and_pad_reordered(ip_a0, &a01, &a02); - - ch_0_out_0 = __SMLAD(a01, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a01, b1, ch_0_out_1); - - b0 = arm_nn_read_q15x2_ia(&ip_b0); - b1 = arm_nn_read_q15x2_ia(&ip_b1); - - ch_0_out_0 = __SMLAD(a02, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a02, b1, ch_0_out_1); - - col_count--; - } /* while over col_count */ - - ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift); - ch_0_out_0 += out_offset; - ch_0_out_0 = MAX(ch_0_out_0, activation_min); - ch_0_out_0 = MIN(ch_0_out_0, activation_max); - *out_0++ = (q7_t)ch_0_out_0; - - ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift); - ch_0_out_1 += out_offset; - ch_0_out_1 = MAX(ch_0_out_1, activation_min); - ch_0_out_1 = MIN(ch_0_out_1, activation_max); - *out_1++ = (q7_t)ch_0_out_1; - } - - out_0 += output_ch; - - /* return the new output pointer with offset */ - return out_0; -#else - (void)input_a; - (void)input_b; - (void)output_ch; - (void)out_shift; - (void)out_mult; - (void)out_offset; - (void)activation_min; - (void)activation_max; - (void)num_col_a; - (void)output_bias; - (void)out_0; - /* To be completed */ - return NULL; -#endif -} diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c deleted file mode 100644 index adfa702..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ConvolutionFunctions/arm_nn_mat_mult_s8.c +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mat_mult_s8.c - * Description: General Matrix-multiplication function - * - * $Date: 27. October 2021 - * $Revision: V.2.0.6 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/* - * s8 General matrix multiplication function with per-channel requantization for upto 4 column batches. - * - * Refer header file for details. - * - */ - -q7_t *arm_nn_mat_mult_s8(const q7_t *input_row, - const q7_t *input_col, - const uint16_t output_ch, - const uint16_t col_batches, - const int32_t *output_shift, - const int32_t *output_mult, - const int32_t out_offset, - const int32_t col_offset, - const int32_t row_offset, - const int16_t activation_min, - const int16_t activation_max, - const uint16_t row_len, - const int32_t *const bias, - q7_t *out) -{ -#if defined(ARM_MATH_MVEI) - (void)row_offset; - if (col_batches == 4) - { - for (int i_out_ch = 0; i_out_ch < output_ch; i_out_ch++) - { - int32_t row_len_tmp = row_len; - const int8_t *ip_r0 = input_row + (i_out_ch * row_len); - const int8_t *ip_c0 = input_col; - const int8_t *ip_c1 = input_col + row_len; - const int8_t *ip_c2 = input_col + (2 * row_len); - const int8_t *ip_c3 = input_col + (3 * row_len); - - int32_t acc_0 = 0; - int32_t acc_1 = 0; - int32_t acc_2 = 0; - int32_t acc_3 = 0; - const int32_t row_loop_cnt = (row_len + 7) / 8; - - for (int i_row_loop = 0; i_row_loop < row_loop_cnt; i_row_loop++) - { - mve_pred16_t p = vctp16q((uint32_t)row_len_tmp); - const int16x8_t offset = vdupq_m_n_s16(vuninitializedq_s16(), col_offset, p); - row_len_tmp -= 8; - - int16x8_t c0 = vldrbq_s16(ip_c0); - ip_c0 += 8; - c0 = vaddq_s16(c0, offset); - - int16x8_t c1 = vldrbq_s16(ip_c1); - ip_c1 += 8; - c1 = vaddq_s16(c1, offset); - - int16x8_t c2 = vldrbq_s16(ip_c2); - ip_c2 += 8; - c2 = vaddq_s16(c2, offset); - - int16x8_t c3 = vldrbq_s16(ip_c3); - ip_c3 += 8; - c3 = vaddq_s16(c3, offset); - - int16x8_t r0 = vldrbq_z_s16(ip_r0, p); - ip_r0 += 8; - - acc_0 = vmladavaq_p_s16(acc_0, r0, c0, p); - acc_1 = vmladavaq_p_s16(acc_1, r0, c1, p); - acc_2 = vmladavaq_p_s16(acc_2, r0, c2, p); - acc_3 = vmladavaq_p_s16(acc_3, r0, c3, p); - } - - int32x4_t res = {acc_0, acc_1, acc_2, acc_3}; - if (bias) - { - res = vaddq_n_s32(res, bias[i_out_ch]); - } - res = arm_requantize_mve(res, output_mult[i_out_ch], output_shift[i_out_ch]); - res = vaddq_n_s32(res, out_offset); - - res = vmaxq_s32(res, vdupq_n_s32(activation_min)); - res = vminq_s32(res, vdupq_n_s32(activation_max)); - - const uint32x4_t scatter_offset = {0, output_ch, output_ch * 2, output_ch * 3}; - vstrbq_scatter_offset_s32(&out[i_out_ch], scatter_offset, res); - } - out += 4 * output_ch; - } - else - { - for (int i_col_batch = (col_batches & ~0x3); i_col_batch < (col_batches & 0x3); i_col_batch++) - { - for (int i_out_ch = 0; i_out_ch < output_ch; i_out_ch++) - { - int32_t row_len_tmp = row_len; - - const int8_t *ip_r0 = input_row + (i_out_ch * row_len); - const int8_t *ip_c0 = input_col + (i_col_batch * row_len); - int32_t acc_0 = 0; - const int32_t row_loop_cnt = (row_len + 7) / 8; - - for (int i_row_loop = 0; i_row_loop < row_loop_cnt; i_row_loop++) - { - const mve_pred16_t p = vctp16q((uint32_t)row_len_tmp); - const int16x8_t offset = vdupq_m_n_s16(vuninitializedq_s16(), col_offset, p); - row_len_tmp -= 8; - - int16x8_t c0 = vldrbq_s16(ip_c0); - ip_c0 += 8; - c0 = vaddq_s16(c0, offset); - - int16x8_t r0 = vldrbq_z_s16(ip_r0, p); - ip_r0 += 8; - acc_0 = vmladavaq_p_s16(acc_0, r0, c0, p); - } - - if (bias) - { - acc_0 += bias[i_out_ch]; - } - acc_0 = arm_nn_requantize(acc_0, output_mult[i_out_ch], output_shift[i_out_ch]); - acc_0 += out_offset; - acc_0 = MAX(acc_0, activation_min); - acc_0 = MIN(acc_0, activation_max); - out[i_out_ch] = (q7_t)acc_0; - } - out += output_ch; - } - } - return out; - -#else - (void)input_row; - (void)input_col; - (void)output_ch; - (void)col_batches; - (void)output_shift; - (void)output_mult; - (void)out_offset; - (void)col_offset; - (void)row_offset; - (void)activation_min; - (void)activation_max; - (void)row_len; - (void)bias; - (void)out; - return NULL; -#endif -} diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/CMakeLists.txt b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/CMakeLists.txt deleted file mode 100644 index cccd996..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/CMakeLists.txt +++ /dev/null @@ -1,21 +0,0 @@ -# -# Copyright (c) 2019-2021 Arm Limited. -# -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the License); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an AS IS BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -file(GLOB SRC "./*_s8.c") -target_sources(cmsis-nn PRIVATE ${SRC} arm_fully_connected_s16.c) - diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c deleted file mode 100644 index 9eb02eb..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15.c +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_fully_connected_mat_q7_vec_q15.c - * Description: Mixed Q15-Q7 fully-connected layer function - * - * $Date: 20. July 2021 - * $Revision: V.1.1.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup FC - * @{ - */ - -/** - * @brief Mixed Q15-Q7 fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * - * Buffer size: - * - * vec_buffer size: 0 - * - * Q7_Q15 version of the fully connected layer - * - * Weights are in q7_t and Activations are in q15_t - * - */ - -arm_status arm_fully_connected_mat_q7_vec_q15(const q15_t *pV, - const q7_t *pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t *bias, - q15_t *pOut, - q15_t *vec_buffer) -{ - (void)vec_buffer; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - const q7_t *pB = pM; - const q7_t *pB2; - q15_t *pO = pOut; - const q7_t *pBias = bias; - const q15_t *pA = pV; - - uint16_t rowCnt = num_of_rows >> 1; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - uint16_t colCnt = dim_vec >> 2; - - pA = pV; - pB2 = pB + dim_vec; - - while (colCnt) - { - q31_t inV, inM11, inM12, inM21, inM22; - pB = read_and_pad(pB, &inM11, &inM12); - pB2 = read_and_pad(pB2, &inM21, &inM22); - - inV = arm_nn_read_q15x2_ia(&pA); - - sum = __SMLAD(inV, inM11, sum); - sum2 = __SMLAD(inV, inM21, sum2); - - inV = arm_nn_read_q15x2_ia(&pA); - - sum = __SMLAD(inV, inM12, sum); - sum2 = __SMLAD(inV, inM22, sum2); - - colCnt--; - } - colCnt = dim_vec & 0x3; - while (colCnt) - { - q15_t inV = *pA++; - q7_t inM = *pB++; - q7_t inM2 = *pB2++; - - sum += inV * inM; - sum2 += inV * inM2; - colCnt--; - } /* while over colCnt */ - *pO++ = (q15_t)(__SSAT((sum >> out_shift), 16)); - *pO++ = (q15_t)(__SSAT((sum2 >> out_shift), 16)); - - /*adjust the pointers and counters */ - pB += dim_vec; - rowCnt--; - } - - /* left-over part of the rows */ - rowCnt = num_of_rows & 0x1; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - uint16_t colCnt = dim_vec >> 2; - - pA = pV; - - while (colCnt) - { - q31_t inV1, inV2, inM11, inM12; - - pB = read_and_pad(pB, &inM11, &inM12); - - inV1 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV1, inM11, sum); - - inV2 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV2, inM12, sum); - - colCnt--; - } - - /* left-over of the vector */ - colCnt = dim_vec & 0x3; - while (colCnt) - { - q15_t inV = *pA++; - q7_t inM = *pB++; - sum += inV * inM; - colCnt--; - } - - *pO++ = (q15_t)(__SSAT((sum >> out_shift), 16)); - - rowCnt--; - } - -#else - int i, j; - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - for (i = 0; i < num_of_rows; i++) - { - int ip_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); - for (j = 0; j < dim_vec; j++) - { - ip_out += pV[j] * pM[i * dim_vec + j]; - } - pOut[i] = (q15_t)__SSAT((ip_out >> out_shift), 16); - } - -#endif /* ARM_MATH_DSP */ - - /* Return to ARM_MATH_SUCCESS */ - return (ARM_MATH_SUCCESS); -} - -/** - * @} end of FC group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c deleted file mode 100644 index a2da772..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_mat_q7_vec_q15_opt.c +++ /dev/null @@ -1,417 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_fully_connected_mat_q7_vec_q15_opt.c - * Description: Mixed Q15-Q7 opt fully-connected layer function - * - * $Date: 20. July 2021 - * $Revision: V.1.1.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup FC - * @{ - */ - -/** - * @brief Mixed Q15-Q7 opt fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * - * Buffer size: - * - * vec_buffer size: 0 - * - * Q7_Q15 version of the fully connected layer - * - * Weights are in q7_t and Activations are in q15_t - * - * Limitation: x4 version requires weight reordering to work - * - * Here we use only one pointer to read 4 rows in the weight - * matrix. So if the original q7_t matrix looks like this: - * - * | a11 | a12 | a13 | a14 | a15 | a16 | a17 | - * - * | a21 | a22 | a23 | a24 | a25 | a26 | a27 | - * - * | a31 | a32 | a33 | a34 | a35 | a36 | a37 | - * - * | a41 | a42 | a43 | a44 | a45 | a46 | a47 | - * - * | a51 | a52 | a53 | a54 | a55 | a56 | a57 | - * - * | a61 | a62 | a63 | a64 | a65 | a66 | a67 | - * - * We operates on multiple-of-4 rows, so the first four rows becomes - * - * | a11 | a21 | a12 | a22 | a31 | a41 | a32 | a42 | - * - * | a13 | a23 | a14 | a24 | a33 | a43 | a34 | a44 | - * - * | a15 | a25 | a16 | a26 | a35 | a45 | a36 | a46 | - * - * The column left over will be in-order. - * which is: - * | a17 | a27 | a37 | a47 | - * - * For the left-over rows, we do 1x1 computation, so the data remains - * as its original order. - * - * So the stored weight matrix looks like this: - * - * | a11 | a21 | a12 | a22 | a31 | a41 | - * - * | a32 | a42 | a13 | a23 | a14 | a24 | - * - * | a33 | a43 | a34 | a44 | a15 | a25 | - * - * | a16 | a26 | a35 | a45 | a36 | a46 | - * - * | a17 | a27 | a37 | a47 | a51 | a52 | - * - * | a53 | a54 | a55 | a56 | a57 | a61 | - * - * | a62 | a63 | a64 | a65 | a66 | a67 | - * - */ - -arm_status arm_fully_connected_mat_q7_vec_q15_opt(const q15_t *pV, - const q7_t *pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t *bias, - q15_t *pOut, - q15_t *vec_buffer) -{ - - (void)vec_buffer; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - const q7_t *pB = pM; - q15_t *pO = pOut; - const q7_t *pBias = bias; - const q15_t *pA = pV; - - uint16_t rowCnt = num_of_rows >> 2; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = dim_vec >> 1; - - pA = pV; - -#ifdef USE_INTRINSIC - -#ifndef ARM_MATH_BIG_ENDIAN - - while (colCnt) - { - q31_t inM11, inM12, inM13, inM14; - q31_t inV; - - inV = arm_nn_read_q15x2_ia(&pA); - inM11 = arm_nn_read_q7x4_ia(&pB); - inM12 = __SXTB16(__ROR(inM11, 8)); - inM11 = __SXTB16(inM11); - sum = __SMLAD(inM11, inV, sum); - sum2 = __SMLAD(inM12, inV, sum2); - inM13 = arm_nn_read_q7x4_ia(&pB); - inM14 = __SXTB16(__ROR(inM13, 8)); - inM13 = __SXTB16(inM13); - sum3 = __SMLAD(inM13, inV, sum3); - sum4 = __SMLAD(inM14, inV, sum4); - colCnt--; - } - -#else - - while (colCnt) - { - q31_t inM11, inM12, inM13, inM14; - q31_t inV; - - inV = *__SIMD32(pA)++; - inM11 = arm_nn_read_q7x4_ia(&pB); - inM12 = __SXTB16(__ROR(inM11, 8)); - inM11 = __SXTB16(inM11); - sum = __SMLAD(inM12, inV, sum); - sum2 = __SMLAD(inM11, inV, sum2); - inM13 = arm_nn_read_q7x4_ia(&pB); - inM14 = __SXTB16(__ROR(inM13, 8)); - inM13 = __SXTB16(inM13); - sum3 = __SMLAD(inM14, inV, sum3); - sum4 = __SMLAD(inM13, inV, sum4); - colCnt--; - } - -#endif /* ARM_MATH_BIG_ENDIAN */ - -#else - - /* - * register needed: - * loop counter: colCnt - * accumulators: sum, sum2, sum3, sum4 - * pointers: pB, pA - * weight data: inM11, inM12, inM13, inM14 - * activation data: inV - */ - -#ifndef ARM_MATH_BIG_ENDIAN - asm volatile("COL_LOOP_%=:\n" - "ldr.w r4, [%[pA]], #4\n" - "ldr.w r1, [%[pB]], #8\n" - "mov.w r0, r1, ror #8\n" - "sxtb16 r0, r0\n" - "sxtb16 r1, r1\n" - "smlad %[sum], r4, r1, %[sum]\n" - "smlad %[sum2], r4, r0, %[sum2]\n" - "ldr.w r3, [%[pB], #-4]\n" - "mov.w r2, r3, ror #8\n" - "sxtb16 r2, r2\n" - "sxtb16 r3, r3\n" - "smlad %[sum3], r4, r3, %[sum3]\n" - "smlad %[sum4], r4, r2, %[sum4]\n" - "subs %[colCnt], #1\n" - "bne COL_LOOP_%=\n" - : [ sum ] "+r"(sum), - [ sum2 ] "+r"(sum2), - [ sum3 ] "+r"(sum3), - [ sum4 ] "+r"(sum4), - [ pB ] "+r"(pB), - [ pA ] "+r"(pA) - : [ colCnt ] "r"(colCnt) - : "r0", "r1", "r2", "r3", "r4"); -#else - asm volatile("COL_LOOP_%=:\n" - "ldr.w r4, [%[pA]], #4\n" - "ldr.w r1, [%[pB]], #8\n" - "mov.w r0, r1, ror #8\n" - "sxtb16 r0, r0\n" - "sxtb16 r1, r1\n" - "smlad %[sum], r4, r0, %[sum]\n" - "smlad %[sum2], r4, r1, %[sum2]\n" - "ldr.w r3, [%[pB], #-4]\n" - "mov.w r2, r3, ror #8\n" - "sxtb16 r2, r2\n" - "sxtb16 r3, r3\n" - "smlad %[sum3], r4, r2, %[sum3]\n" - "smlad %[sum4], r4, r3, %[sum4]\n" - "subs %[colCnt], #1\n" - "bne COL_LOOP_%=\n" - : [ sum ] "+r"(sum), - [ sum2 ] "+r"(sum2), - [ sum3 ] "+r"(sum3), - [ sum4 ] "+r"(sum4), - [ pB ] "+r"(pB), - [ pA ] "+r"(pA) - : [ colCnt ] "r"(colCnt) - : "r0", "r1", "r2", "r3", "r4"); -#endif /* ARM_MATH_BIG_ENDIAN */ - -#endif /* USE_INTRINSIC */ - - colCnt = dim_vec & 0x1; - while (colCnt) - { - q15_t inV = *pA++; - q7_t inM = *pB++; - q7_t inM2 = *pB++; - q7_t inM3 = *pB++; - q7_t inM4 = *pB++; - - sum += inV * inM; - sum2 += inV * inM2; - sum3 += inV * inM3; - sum4 += inV * inM4; - colCnt--; - } /* while over colCnt */ - *pO++ = (q15_t)(__SSAT((sum >> out_shift), 16)); - *pO++ = (q15_t)(__SSAT((sum2 >> out_shift), 16)); - *pO++ = (q15_t)(__SSAT((sum3 >> out_shift), 16)); - *pO++ = (q15_t)(__SSAT((sum4 >> out_shift), 16)); - - /* adjust the pointers and counters */ - rowCnt--; - } - - /* left-over part of the rows */ - rowCnt = num_of_rows & 0x3; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = dim_vec >> 2; - - pA = pV; - - while (colCnt) - { - q31_t inV1, inV2, inM11, inM12; - - pB = read_and_pad(pB, &inM11, &inM12); - - inV1 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV1, inM11, sum); - - inV2 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV2, inM12, sum); - - colCnt--; - } - - /* left-over of the vector */ - colCnt = dim_vec & 0x3; - while (colCnt) - { - q15_t inV = *pA++; - q7_t inM = *pB++; - sum += inV * inM; - colCnt--; - } - - *pO++ = (q15_t)(__SSAT((sum >> out_shift), 16)); - - rowCnt--; - } - -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - uint16_t rowCnt = num_of_rows >> 2; - const q7_t *pB = pM; - const q15_t *pA; - q15_t *pO = pOut; - const q7_t *pBias = bias; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - uint16_t colCnt = dim_vec >> 1; - - pA = pV; - - while (colCnt) - { - q15_t inA1 = *pA++; - q15_t inA2 = *pA++; - - q7_t inB1 = *pB++; - q7_t inB3 = *pB++; - q7_t inB2 = *pB++; - q7_t inB4 = *pB++; - - sum += inA1 * inB1 + inA2 * inB2; - sum2 += inA1 * inB3 + inA2 * inB4; - - inB1 = *pB++; - inB3 = *pB++; - inB2 = *pB++; - inB4 = *pB++; - - sum3 += inA1 * inB1 + inA2 * inB2; - sum4 += inA1 * inB3 + inA2 * inB4; - - colCnt--; - } - - colCnt = dim_vec & 0x1; - while (colCnt) - { - q15_t inA = *pA++; - q7_t inB = *pB++; - sum += inA * inB; - inB = *pB++; - sum2 += inA * inB; - inB = *pB++; - sum3 += inA * inB; - inB = *pB++; - sum4 += inA * inB; - - colCnt--; - } - *pO++ = (q15_t)__SSAT((sum >> out_shift), 16); - *pO++ = (q15_t)__SSAT((sum2 >> out_shift), 16); - *pO++ = (q15_t)__SSAT((sum3 >> out_shift), 16); - *pO++ = (q15_t)__SSAT((sum4 >> out_shift), 16); - - rowCnt--; - } - - rowCnt = num_of_rows & 0x3; - - while (rowCnt) - { - int ip_out = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - int j; - - pA = pV; - for (j = 0; j < dim_vec; j++) - { - q15_t inA = *pA++; - q7_t inB = *pB++; - ip_out += inA * inB; - } - *pO++ = (q15_t)__SSAT((ip_out >> out_shift), 16); - - rowCnt--; - } - -#endif /* ARM_MATH_DSP */ - - /* Return to ARM_MATH_SUCCESS */ - return (ARM_MATH_SUCCESS); -} - -/** - * @} end of FC group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c deleted file mode 100644 index d8b6887..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15.c +++ /dev/null @@ -1,195 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_fully_connected_q15.c - * Description: Q15 basic fully-connected layer function - * - * $Date: 20. July 2021 - * $Revision: V.1.1.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup FC - * @{ - */ - -/** - * @brief Q15 opt fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - * - * @details - * - * Buffer size: - * - * vec_buffer size: 0 - * - */ - -arm_status arm_fully_connected_q15(const q15_t *pV, - const q15_t *pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q15_t *bias, - q15_t *pOut, - q15_t *vec_buffer) -{ - (void)vec_buffer; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - const q15_t *pB = pM; - const q15_t *pB2 = pB + dim_vec; - q15_t *pO = pOut; - const q15_t *pA; - const q15_t *pBias = bias; - uint16_t rowCnt = num_of_rows >> 1; - - /* this loop loops over different output */ - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = dim_vec >> 2; - - pA = pV; - pB2 = pB + dim_vec; - - while (colCnt) - { - q31_t inV1, inM1, inM2; - inV1 = arm_nn_read_q15x2_ia(&pA); - inM1 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inV1, inM1, sum); - inM2 = arm_nn_read_q15x2_ia(&pB2); - sum2 = __SMLAD(inV1, inM2, sum2); - - inV1 = arm_nn_read_q15x2_ia(&pA); - inM1 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inV1, inM1, sum); - inM2 = arm_nn_read_q15x2_ia(&pB2); - sum2 = __SMLAD(inV1, inM2, sum2); - - colCnt--; - } - colCnt = dim_vec & 0x3; - while (colCnt) - { - q15_t inV = *pA++; - q15_t inM = *pB++; - q15_t inM2 = *pB2++; - - sum += inV * inM; - sum2 += inV * inM2; - colCnt--; - } /* while over colCnt */ - *pO++ = (q15_t)(__SSAT((sum >> out_shift), 16)); - *pO++ = (q15_t)(__SSAT((sum2 >> out_shift), 16)); - - /* adjust the pointers and counters */ - pB = pB + dim_vec; - rowCnt--; - } - - rowCnt = num_of_rows & 0x1; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = dim_vec >> 2; - - pA = pV; - - while (colCnt) - { - q31_t inV1, inM1; - inV1 = arm_nn_read_q15x2_ia(&pA); - inM1 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inV1, inM1, sum); - - inV1 = arm_nn_read_q15x2_ia(&pA); - inM1 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inV1, inM1, sum); - - colCnt--; - } - - /* left-over of the vector */ - colCnt = dim_vec & 0x3; - while (colCnt) - { - q15_t inV = *pA++; - q15_t inM = *pB++; - - sum += inV * inM; - - colCnt--; - } - - *pO++ = (q15_t)(__SSAT((sum >> out_shift), 16)); - - rowCnt--; - } - -#else - int i, j; - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - for (i = 0; i < num_of_rows; i++) - { - int ip_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); - for (j = 0; j < dim_vec; j++) - { - ip_out += pV[j] * pM[i * dim_vec + j]; - } - pOut[i] = (q15_t)__SSAT((ip_out >> out_shift), 16); - } - -#endif /* ARM_MATH_DSP */ - - /* Return to application */ - return (ARM_MATH_SUCCESS); -} - -/** - * @} end of FC group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c deleted file mode 100644 index f6c9b16..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q15_opt.c +++ /dev/null @@ -1,336 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_fully_connected_q15_opt.c - * Description: Q15 opt fully-connected layer function - * - * $Date: 20. July 2021 - * $Revision: V.1.1.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup FC - * @{ - */ - -/** - * @brief Q15 opt fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - * - * @details - * - * Buffer size: - * - * vec_buffer size: 0 - * - * Here we use only one pointer to read 4 rows in the weight - * matrix. So if the original matrix looks like this: - * - * | a11 | a12 | a13 | - * - * | a21 | a22 | a23 | - * - * | a31 | a32 | a33 | - * - * | a41 | a42 | a43 | - * - * | a51 | a52 | a53 | - * - * | a61 | a62 | a63 | - * - * We operates on multiple-of-4 rows, so the first four rows becomes - * - * | a11 | a12 | a21 | a22 | a31 | a32 | a41 | a42 | - * - * | a13 | a23 | a33 | a43 | - * - * Remaining rows are kept the same original order. - * - * So the stored weight matrix looks like this: - * - * - * | a11 | a12 | a21 | a22 | a31 | a32 | a41 | a42 | - * - * | a13 | a23 | a33 | a43 | a51 | a52 | a53 | a61 | - * - * | a62 | a63 | - */ - -arm_status arm_fully_connected_q15_opt(const q15_t *pV, - const q15_t *pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q15_t *bias, - q15_t *pOut, - q15_t *vec_buffer) -{ - (void)vec_buffer; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - const q15_t *pB = pM; - q15_t *pO = pOut; - const q15_t *pBias = bias; - const q15_t *pA = pV; - - uint16_t rowCnt = num_of_rows >> 2; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = dim_vec >> 1; - - pA = pV; - -#ifdef USE_INTRINSIC - - while (colCnt) - { - q31_t inM11, inM12, inM13, inM14; - q31_t inV; - - inV = arm_nn_read_q15x2_ia(&pA); - inM11 = arm_nn_read_q15x2_ia(&pB); - sum = __SMLAD(inV, inM11, sum); - inM12 = arm_nn_read_q15x2_ia(&pB); - sum2 = __SMLAD(inV, inM12, sum2); - inM13 = arm_nn_read_q15x2_ia(&pB); - sum3 = __SMLAD(inV, inM13, sum3); - inM14 = arm_nn_read_q15x2_ia(&pB); - sum4 = __SMLAD(inV, inM14, sum4); - colCnt--; - } - -#else - - /* - * register needed: - * loop counter: colCnt - * accumulators: sum, sum2, sum3, sum4 - * pointers: pB, pA - * weight data: inM11, inM12, inM13, inM14 - * activation data: inV - */ - - asm volatile("COL_LOOP_%=:\n" - "ldr.w r4, [%[pA]], #4\n" - "ldr.w r0, [%[pB]], #16\n" - "smlad %[sum], r4, r0, %[sum]\n" - "ldr.w r1, [%[pB] , #-12]\n" - "smlad %[sum2], r4, r1, %[sum2]\n" - "ldr.w r2, [%[pB] , #-8]\n" - "smlad %[sum3], r4, r2, %[sum3]\n" - "ldr.w r3, [%[pB] , #-4]\n" - "smlad %[sum4], r4, r3, %[sum4]\n" - "subs %[colCnt], #1\n" - "bne COL_LOOP_%=\n" - : [ sum ] "+r"(sum), - [ sum2 ] "+r"(sum2), - [ sum3 ] "+r"(sum3), - [ sum4 ] "+r"(sum4), - [ pB ] "+r"(pB), - [ pA ] "+r"(pA) - : [ colCnt ] "r"(colCnt) - : "r0", "r1", "r2", "r3", "r4"); - -#endif /* USE_INTRINSIC */ - - colCnt = dim_vec & 0x1; - while (colCnt) - { - - q15_t inV = *pA++; - q15_t inM = *pB++; - q15_t inM2 = *pB++; - q15_t inM3 = *pB++; - q15_t inM4 = *pB++; - - sum += inV * inM; - sum2 += inV * inM2; - sum3 += inV * inM3; - sum4 += inV * inM4; - colCnt--; - } /* while over colCnt */ - *pO++ = (q15_t)(__SSAT((sum >> out_shift), 16)); - *pO++ = (q15_t)(__SSAT((sum2 >> out_shift), 16)); - *pO++ = (q15_t)(__SSAT((sum3 >> out_shift), 16)); - *pO++ = (q15_t)(__SSAT((sum4 >> out_shift), 16)); - - /* adjust the pointers and counters */ - rowCnt--; - } - - /* left-over part of the rows */ - rowCnt = num_of_rows & 0x3; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = dim_vec >> 2; - - pA = pV; - - while (colCnt) - { - q31_t inV1, inV2, inM1, inM2; - - inM1 = arm_nn_read_q15x2_ia(&pB); - inV1 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV1, inM1, sum); - - inM2 = arm_nn_read_q15x2_ia(&pB); - inV2 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV2, inM2, sum); - - colCnt--; - } - - /* left-over of the vector */ - colCnt = dim_vec & 0x3; - while (colCnt) - { - q15_t inV = *pA++; - q15_t inM = *pB++; - sum += inV * inM; - colCnt--; - } - - *pO++ = (q15_t)(__SSAT((sum >> out_shift), 16)); - - rowCnt--; - } - -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - uint16_t rowCnt = num_of_rows >> 2; - const q15_t *pB = pM; - const q15_t *pA; - q15_t *pO = pOut; - const q15_t *pBias = bias; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = dim_vec >> 1; - - pA = pV; - while (colCnt) - { - q15_t inA1 = *pA++; - q15_t inA2 = *pA++; - - q15_t inB1 = *pB++; - q15_t inB2 = *pB++; - sum += inA1 * inB1 + inA2 * inB2; - - inB1 = *pB++; - inB2 = *pB++; - sum2 += inA1 * inB1 + inA2 * inB2; - - inB1 = *pB++; - inB2 = *pB++; - sum3 += inA1 * inB1 + inA2 * inB2; - - inB1 = *pB++; - inB2 = *pB++; - sum4 += inA1 * inB1 + inA2 * inB2; - - colCnt--; - } - colCnt = dim_vec & 0x1; - while (colCnt) - { - q15_t inA = *pA++; - q15_t inB = *pB++; - sum += inA * inB; - inB = *pB++; - sum2 += inA * inB; - inB = *pB++; - sum3 += inA * inB; - inB = *pB++; - sum4 += inA * inB; - colCnt--; - } - *pO++ = (q15_t)__SSAT((sum >> out_shift), 16); - *pO++ = (q15_t)__SSAT((sum2 >> out_shift), 16); - *pO++ = (q15_t)__SSAT((sum3 >> out_shift), 16); - *pO++ = (q15_t)__SSAT((sum4 >> out_shift), 16); - - rowCnt--; - } - rowCnt = num_of_rows & 0x3; - - while (rowCnt) - { - int ip_out = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - int j; - - pA = pV; - for (j = 0; j < dim_vec; j++) - { - q15_t inA = *pA++; - q15_t inB = *pB++; - ip_out += inA * inB; - } - *pO++ = (q15_t)__SSAT((ip_out >> out_shift), 16); - - rowCnt--; - } - -#endif /* ARM_MATH_DSP */ - - /* Return to ARM_MATH_SUCCESS */ - return (ARM_MATH_SUCCESS); -} - -/** - * @} end of FC group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c deleted file mode 100644 index d500efe..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7.c +++ /dev/null @@ -1,200 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_fully_connected_q7.c - * Description: Q7 basic fully-connected layer function - * - * $Date: July 20, 2021 - * $Revision: V.1.1.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup FC - * @{ - */ - -/** - * @brief Q7 basic fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * - * Buffer size: - * - * vec_buffer size: dim_vec - * - * This basic function is designed to work with regular weight - * matrix without interleaving. - * - */ - -arm_status arm_fully_connected_q7(const q7_t *pV, - const q7_t *pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t *bias, - q7_t *pOut, - q15_t *vec_buffer) -{ - -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - const q7_t *pB = pM; - const q7_t *pB2; - q7_t *pO = pOut; - const q7_t *pBias = bias; - const q15_t *pA; - uint16_t rowCnt = num_of_rows >> 1; - - /* expand the vector into the buffer */ - arm_q7_to_q15_reordered_no_shift(pV, vec_buffer, dim_vec); - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - uint16_t colCnt = dim_vec >> 2; - - pA = vec_buffer; - pB2 = pB + dim_vec; - - while (colCnt) - { - q31_t inV, inM11, inM12, inM21, inM22; - pB = read_and_pad_reordered(pB, &inM11, &inM12); - pB2 = read_and_pad_reordered(pB2, &inM21, &inM22); - - inV = arm_nn_read_q15x2_ia(&pA); - - sum = __SMLAD(inV, inM11, sum); - sum2 = __SMLAD(inV, inM21, sum2); - - inV = arm_nn_read_q15x2_ia(&pA); - - sum = __SMLAD(inV, inM12, sum); - sum2 = __SMLAD(inV, inM22, sum2); - - colCnt--; - } - colCnt = dim_vec & 0x3; - while (colCnt) - { - q7_t inV = *pA++; - q15_t inM = *pB++; - q15_t inM2 = *pB2++; - - sum += inV * inM; - sum2 += inV * inM2; - colCnt--; - } /* while over colCnt */ - *pO++ = (q7_t)(__SSAT((sum >> out_shift), 8)); - *pO++ = (q7_t)(__SSAT((sum2 >> out_shift), 8)); - - /* adjust the pointers and counters */ - pB += dim_vec; - rowCnt--; - } - - /* left-over part of the rows */ - rowCnt = num_of_rows & 0x1; - - while (rowCnt) - { - uint16_t colCnt = dim_vec >> 2; - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - pA = vec_buffer; - - while (colCnt) - { - q31_t inV1, inV2, inM11, inM12; - - pB = read_and_pad_reordered(pB, &inM11, &inM12); - - inV1 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV1, inM11, sum); - - inV2 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV2, inM12, sum); - - colCnt--; - } - - /* left-over of the vector */ - colCnt = dim_vec & 0x3; - while (colCnt) - { - q7_t inV = *pA++; - q15_t inM = *pB++; - sum += inV * inM; - colCnt--; - } - - *pO++ = (q7_t)(__SSAT((sum >> out_shift), 8)); - - rowCnt--; - } - -#else - (void)vec_buffer; - int i, j; - - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - for (i = 0; i < num_of_rows; i++) - { - int ip_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift); - for (j = 0; j < dim_vec; j++) - { - ip_out += pV[j] * pM[i * dim_vec + j]; - } - pOut[i] = (q7_t)__SSAT((ip_out >> out_shift), 8); - } - -#endif /* ARM_MATH_DSP */ - - /* Return to ARM_MATH_SUCCESS */ - return (ARM_MATH_SUCCESS); -} - -/** - * @} end of FC group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c deleted file mode 100644 index 2f3d653..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_q7_opt.c +++ /dev/null @@ -1,495 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_fully_connected_q7_opt.c - * Description: Q7 basic fully-connected layer function - * - * $Date: 20. July 2021 - * $Revision: V.1.1.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup FC - * @{ - */ - -/** - * @brief Q7 opt fully-connected layer function - * @param[in] pV pointer to input vector - * @param[in] pM pointer to matrix weights - * @param[in] dim_vec length of the vector - * @param[in] num_of_rows number of rows in weight matrix - * @param[in] bias_shift amount of left-shift for bias - * @param[in] out_shift amount of right-shift for output - * @param[in] bias pointer to bias - * @param[in,out] pOut pointer to output vector - * @param[in,out] vec_buffer pointer to buffer space for input - * @return The function returns ARM_MATH_SUCCESS - * - * @details - * - * Buffer size: - * - * vec_buffer size: dim_vec - * - * This opt function is designed to work with interleaved weight - * matrix. The vector input is assumed in q7_t format, we call - * arm_q7_to_q15_no_shift_shuffle function to expand into - * q15_t format with certain weight re-ordering, refer to the function - * comments for more details. - * Here we use only one pointer to read 4 rows in the weight - * matrix. So if the original q7_t matrix looks like this: - * - * | a11 | a12 | a13 | a14 | a15 | a16 | a17 | - * - * | a21 | a22 | a23 | a24 | a25 | a26 | a27 | - * - * | a31 | a32 | a33 | a34 | a35 | a36 | a37 | - * - * | a41 | a42 | a43 | a44 | a45 | a46 | a47 | - * - * | a51 | a52 | a53 | a54 | a55 | a56 | a57 | - * - * | a61 | a62 | a63 | a64 | a65 | a66 | a67 | - * - * - * We operates on multiple-of-4 rows, so the first four rows becomes - * - * | a11 | a21 | a13 | a23 | a31 | a41 | a33 | a43 | - * - * | a12 | a22 | a14 | a24 | a32 | a42 | a34 | a44 | - * - * | a15 | a25 | a35 | a45 | a16 | a26 | a36 | a46 | - * - * So within the kernel, we first read the re-ordered vector in as: - * - * | b1 | b3 | and | b2 | b4 | - * - * the four q31_t weights will look like - * - * | a11 | a13 |, | a21 | a23 |, | a31 | a33 |, | a41 | a43 | - * - * | a12 | a14 |, | a22 | a24 |, | a32 | a34 |, | a42 | a44 | - * - * The column left over will be in-order. - * which is: - * - * | a17 | a27 | a37 | a47 | - * - * For the left-over rows, we do 1x1 computation, so the data remains - * as its original order. - * - * So the stored weight matrix looks like this: - * - * | a11 | a21 | a13 | a23 | a31 | a41 | - * - * | a33 | a43 | a12 | a22 | a14 | a24 | - * - * | a32 | a42 | a34 | a44 | a15 | a25 | - * - * | a35 | a45 | a16 | a26 | a36 | a46 | - * - * | a17 | a27 | a37 | a47 | a51 | a52 | - * - * | a53 | a54 | a55 | a56 | a57 | a61 | - * - * | a62 | a63 | a64 | a65 | a66 | a67 | - * - * - */ - -arm_status arm_fully_connected_q7_opt(const q7_t *pV, - const q7_t *pM, - const uint16_t dim_vec, - const uint16_t num_of_rows, - const uint16_t bias_shift, - const uint16_t out_shift, - const q7_t *bias, - q7_t *pOut, - q15_t *vec_buffer) -{ - -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - const q7_t *pB = pM; - q7_t *pO = pOut; - const q7_t *pBias = bias; - const q15_t *pA; - uint16_t rowCnt = num_of_rows >> 2; - - arm_q7_to_q15_reordered_no_shift(pV, vec_buffer, dim_vec); - - while (rowCnt) - { - - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = dim_vec >> 2; - - pA = vec_buffer; - -#ifdef USE_INTRINSIC - -#ifndef ARM_MATH_BIG_ENDIAN - while (colCnt) - { - q31_t inM11, inM12, inM13, inM14; - q31_t inV; - - inV = arm_nn_read_q15x2_ia(&pA); - inM11 = arm_nn_read_q7x4_ia(&pB); - inM12 = __SXTB16(__ROR(inM11, 8)); - inM11 = __SXTB16(inM11); - sum = __SMLAD(inM11, inV, sum); - sum2 = __SMLAD(inM12, inV, sum2); - inM13 = arm_nn_read_q7x4_ia(&pB); - inM14 = __SXTB16(__ROR(inM13, 8)); - inM13 = __SXTB16(inM13); - sum3 = __SMLAD(inM13, inV, sum3); - sum4 = __SMLAD(inM14, inV, sum4); - - inV = arm_nn_read_q15x2_ia(&pA); - inM11 = arm_nn_read_q7x4_ia(&pB); - inM12 = __SXTB16(__ROR(inM11, 8)); - inM11 = __SXTB16(inM11); - sum = __SMLAD(inM11, inV, sum); - sum2 = __SMLAD(inM12, inV, sum2); - inM13 = arm_nn_read_q7x4_ia(&pB); - inM14 = __SXTB16(__ROR(inM13, 8)); - inM13 = __SXTB16(inM13); - sum3 = __SMLAD(inM13, inV, sum3); - sum4 = __SMLAD(inM14, inV, sum4); - colCnt--; - } -#else - while (colCnt) - { - q31_t inM11, inM12, inM13, inM14; - q31_t inV; - - inV = arm_nn_read_q15x2_ia(&pA); - inM11 = arm_nn_read_q7x4_ia(&pB); - inM12 = __SXTB16(__ROR(inM11, 8)); - inM11 = __SXTB16(inM11); - sum = __SMLAD(inM12, inV, sum); - sum2 = __SMLAD(inM11, inV, sum2); - inM13 = arm_nn_read_q7x4_ia(&pB); - inM14 = __SXTB16(__ROR(inM13, 8)); - inM13 = __SXTB16(inM13); - sum3 = __SMLAD(inM14, inV, sum3); - sum4 = __SMLAD(inM13, inV, sum4); - - inV = arm_nn_read_q15x2_ia(&pA); - inM11 = arm_nn_read_q7x4_ia(&pB); - inM12 = __SXTB16(__ROR(inM11, 8)); - inM11 = __SXTB16(inM11); - sum = __SMLAD(inM12, inV, sum); - sum2 = __SMLAD(inM11, inV, sum2); - inM13 = arm_nn_read_q7x4_ia(&pB); - inM14 = __SXTB16(__ROR(inM13, 8)); - inM13 = __SXTB16(inM13); - sum3 = __SMLAD(inM14, inV, sum3); - sum4 = __SMLAD(inM13, inV, sum4); - colCnt--; - } -#endif /* ARM_MATH_BIG_ENDIAN */ - -#else - - /* - * register needed: - * loop counter: colCnt - * accumulators: sum, sum2, sum3, sum4 - * pointers: pB, pA - * weight data: inM11, inM12, inM13, inM14 - * activation data: inV - */ - -#ifndef ARM_MATH_BIG_ENDIAN - asm volatile("COL_LOOP_%=:\n" - "ldr.w r4, [%[pA]], #8\n" - "ldr.w r1, [%[pB]], #16\n" - "mov.w r0, r1, ror #8\n" - "sxtb16 r0, r0\n" - "sxtb16 r1, r1\n" - "smlad %[sum], r4, r1, %[sum]\n" - "smlad %[sum2], r4, r0, %[sum2]\n" - "ldr.w r3, [%[pB], #-12]\n" - "mov.w r2, r3, ror #8\n" - "sxtb16 r2, r2\n" - "sxtb16 r3, r3\n" - "smlad %[sum3], r4, r3, %[sum3]\n" - "smlad %[sum4], r4, r2, %[sum4]\n" - "ldr.w r4, [%[pA], #-4]\n" - "ldr.w r1, [%[pB], #-8]\n" - "mov.w r0, r1, ror #8\n" - "sxtb16 r0, r0\n" - "sxtb16 r1, r1\n" - "smlad %[sum], r4, r1, %[sum]\n" - "smlad %[sum2], r4, r0, %[sum2]\n" - "ldr.w r3, [%[pB], #-4]\n" - "mov.w r2, r3, ror #8\n" - "sxtb16 r2, r2\n" - "sxtb16 r3, r3\n" - "smlad %[sum3], r4, r3, %[sum3]\n" - "smlad %[sum4], r4, r2, %[sum4]\n" - "subs %[colCnt], #1\n" - "bne COL_LOOP_%=\n" - : [ sum ] "+r"(sum), - [ sum2 ] "+r"(sum2), - [ sum3 ] "+r"(sum3), - [ sum4 ] "+r"(sum4), - [ pB ] "+r"(pB), - [ pA ] "+r"(pA) - : [ colCnt ] "r"(colCnt) - : "r0", "r1", "r2", "r3", "r4"); -#else - asm volatile("COL_LOOP_%=:\n" - "ldr.w r4, [%[pA]], #8\n" - "ldr.w r1, [%[pB]], #16\n" - "mov.w r0, r1, ror #8\n" - "sxtb16 r0, r0\n" - "sxtb16 r1, r1\n" - "smlad %[sum], r4, r0, %[sum]\n" - "smlad %[sum2], r4, r1, %[sum2]\n" - "ldr.w r3, [%[pB], #-12]\n" - "mov.w r2, r3, ror #8\n" - "sxtb16 r2, r2\n" - "sxtb16 r3, r3\n" - "smlad %[sum3], r4, r2, %[sum3]\n" - "smlad %[sum4], r4, r3, %[sum4]\n" - "ldr.w r4, [%[pA], #-4]\n" - "ldr.w r1, [%[pB], #-8]\n" - "mov.w r0, r1, ror #8\n" - "sxtb16 r0, r0\n" - "sxtb16 r1, r1\n" - "smlad %[sum], r4, r0, %[sum]\n" - "smlad %[sum2], r4, r1, %[sum2]\n" - "ldr.w r3, [%[pB], #-4]\n" - "mov.w r2, r3, ror #8\n" - "sxtb16 r2, r2\n" - "sxtb16 r3, r3\n" - "smlad %[sum3], r4, r2, %[sum3]\n" - "smlad %[sum4], r4, r3, %[sum4]\n" - "subs %[colCnt], #1\n" - "bne COL_LOOP_%=\n" - : [ sum ] "+r"(sum), - [ sum2 ] "+r"(sum2), - [ sum3 ] "+r"(sum3), - [ sum4 ] "+r"(sum4), - [ pB ] "+r"(pB), - [ pA ] "+r"(pA) - : [ colCnt ] "r"(colCnt) - : "r0", "r1", "r2", "r3", "r4"); -#endif /* ARM_MATH_BIG_ENDIAN */ - -#endif /* USE_INTRINSIC */ - - colCnt = dim_vec & 0x3; - while (colCnt) - { - q15_t inV = *pA++; - q7_t inM = *pB++; - q7_t inM2 = *pB++; - q7_t inM3 = *pB++; - q7_t inM4 = *pB++; - - sum += inV * inM; - sum2 += inV * inM2; - sum3 += inV * inM3; - sum4 += inV * inM4; - colCnt--; - } /* while over colCnt */ - *pO++ = (q7_t)(__SSAT((sum >> out_shift), 8)); - *pO++ = (q7_t)(__SSAT((sum2 >> out_shift), 8)); - *pO++ = (q7_t)(__SSAT((sum3 >> out_shift), 8)); - *pO++ = (q7_t)(__SSAT((sum4 >> out_shift), 8)); - - /* adjust the pointers and counters */ - rowCnt--; - } - - /* left-over part of the rows */ - rowCnt = num_of_rows & 0x3; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - uint16_t colCnt = dim_vec >> 2; - - pA = vec_buffer; - - while (colCnt) - { - q31_t inV1, inV2, inM11, inM12; - - pB = read_and_pad_reordered(pB, &inM11, &inM12); - - inV1 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV1, inM11, sum); - - inV2 = arm_nn_read_q15x2_ia(&pA); - sum = __SMLAD(inV2, inM12, sum); - - colCnt--; - } - - /* left-over of the vector */ - colCnt = dim_vec & 0x3; - while (colCnt) - { - q15_t inV = *pA++; - q7_t inM = *pB++; - sum += inV * inM; - colCnt--; - } - - *pO++ = (q7_t)(__SSAT((sum >> out_shift), 8)); - - rowCnt--; - } - -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - (void)vec_buffer; - uint16_t rowCnt = num_of_rows >> 2; - const q7_t *pB = pM; - const q7_t *pA; - q7_t *pO = pOut; - const q7_t *pBias = bias; - - while (rowCnt) - { - q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - uint16_t colCnt = dim_vec >> 2; - - pA = pV; - - while (colCnt) - { - q7_t inA1 = *pA++; - q7_t inA3 = *pA++; - q7_t inA2 = *pA++; - q7_t inA4 = *pA++; - - q7_t inB1 = *pB++; - q7_t inB3 = *pB++; - q7_t inB2 = *pB++; - q7_t inB4 = *pB++; - - sum += inA1 * inB1 + inA2 * inB2; - sum2 += inA1 * inB3 + inA2 * inB4; - - inB1 = *pB++; - inB3 = *pB++; - inB2 = *pB++; - inB4 = *pB++; - - sum3 += inA1 * inB1 + inA2 * inB2; - sum4 += inA1 * inB3 + inA2 * inB4; - - inB1 = *pB++; - inB3 = *pB++; - inB2 = *pB++; - inB4 = *pB++; - - sum += inA3 * inB1 + inA4 * inB2; - sum2 += inA3 * inB3 + inA4 * inB4; - - inB1 = *pB++; - inB3 = *pB++; - inB2 = *pB++; - inB4 = *pB++; - - sum3 += inA3 * inB1 + inA4 * inB2; - sum4 += inA3 * inB3 + inA4 * inB4; - - colCnt--; - } - colCnt = dim_vec & 0x3; - while (colCnt) - { - q7_t inA = *pA++; - q7_t inB = *pB++; - sum += inA * inB; - inB = *pB++; - sum2 += inA * inB; - inB = *pB++; - sum3 += inA * inB; - inB = *pB++; - sum4 += inA * inB; - - colCnt--; - } - *pO++ = (q7_t)__SSAT((sum >> out_shift), 8); - *pO++ = (q7_t)__SSAT((sum2 >> out_shift), 8); - *pO++ = (q7_t)__SSAT((sum3 >> out_shift), 8); - *pO++ = (q7_t)__SSAT((sum4 >> out_shift), 8); - - rowCnt--; - } - - rowCnt = num_of_rows & 0x3; - - while (rowCnt) - { - int ip_out = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift); - - int j; - - pA = pV; - for (j = 0; j < dim_vec; j++) - { - q7_t inA = *pA++; - q7_t inB = *pB++; - ip_out += inA * inB; - } - *pO++ = (q7_t)__SSAT((ip_out >> out_shift), 8); - - rowCnt--; - } - -#endif /* ARM_MATH_DSP */ - - /* Return to ARM_MATH_SUCCESS */ - return (ARM_MATH_SUCCESS); -} - -/** - * @} end of FC group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s16.c deleted file mode 100644 index 46df578..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s16.c +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_fully_connected_s16 - * Description: Fully connected function compatible with TF Lite. - * - * $Date: 3. August 2021 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup FC - * @{ - */ - -/* - * S16 basic fully-connected and matrix multiplication layer function for TensorFlow Lite - * - * Refer header file for details. - * - */ -arm_status arm_fully_connected_s16(const cmsis_nn_context *ctx, - const cmsis_nn_fc_params *fc_params, - const cmsis_nn_per_tensor_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q15_t *input, - const cmsis_nn_dims *filter_dims, - const q7_t *kernel, - const cmsis_nn_dims *bias_dims, - const int64_t *bias, - const cmsis_nn_dims *output_dims, - q15_t *output) -{ - (void)bias_dims; - (void)ctx; - (void)fc_params->filter_offset; - - int32_t batch_cnt = input_dims->n; - - const q31_t reduced_multiplier = REDUCE_MULTIPLIER(quant_params->multiplier); - - while (batch_cnt) - { - arm_nn_vec_mat_mult_t_s16(input, - kernel, - bias, - output, - reduced_multiplier, - quant_params->shift, - filter_dims->n, /* col_dim or accum_depth */ - output_dims->c, /* row_dim or output_depth */ - fc_params->activation.min, - fc_params->activation.max); - input += filter_dims->n; - output += output_dims->c; - batch_cnt--; - } - - return (ARM_MATH_SUCCESS); -} - -int32_t arm_fully_connected_s16_get_buffer_size(const cmsis_nn_dims *filter_dims) -{ - (void)filter_dims; - return 0; -} - -/** - * @} end of FC group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c deleted file mode 100644 index 9615701..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/FullyConnectedFunctions/arm_fully_connected_s8.c +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright (C) 2010-2022 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_fully_connected_s8 - * Description: Fully connected function compatible with TF Lite. - * - * $Date: 8 April 2022 - * $Revision: V.3.1.0 - * - * Target Processor: Cortex-M and Cortex-A cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup FC - * @{ - */ - -/* - * S8 basic fully-connected and matrix multiplication layer function for TensorFlow Lite - * - * Refer header file for details. - * - */ - -arm_status arm_fully_connected_s8(const cmsis_nn_context *ctx, - const cmsis_nn_fc_params *fc_params, - const cmsis_nn_per_tensor_quant_params *quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input, - const cmsis_nn_dims *filter_dims, - const q7_t *kernel, - const cmsis_nn_dims *bias_dims, - const int32_t *bias, - const cmsis_nn_dims *output_dims, - q7_t *output) -{ - (void)bias_dims; - (void)ctx; - (void)fc_params->filter_offset; - - int32_t batch_cnt = input_dims->n; - - while (batch_cnt) - { - arm_nn_vec_mat_mult_t_s8(input, - kernel, - bias, - output, - fc_params->input_offset, - 0, - fc_params->output_offset, - quant_params->multiplier, - quant_params->shift, - filter_dims->n, /* col_dim or accum_depth */ - output_dims->c, /* row_dim or output_depth */ - fc_params->activation.min, - fc_params->activation.max, - 1L); - input += filter_dims->n; - output += output_dims->c; - batch_cnt--; - } - return (ARM_MATH_SUCCESS); -} - -int32_t arm_fully_connected_s8_get_buffer_size(const cmsis_nn_dims *filter_dims) -{ - (void)filter_dims; - return 0; -} - -/** - * @} end of FC group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/CMakeLists.txt b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/CMakeLists.txt deleted file mode 100644 index 0aa9f38..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/CMakeLists.txt +++ /dev/null @@ -1,26 +0,0 @@ -# -# Copyright (c) 2019-2022 Arm Limited. -# -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the License); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an AS IS BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -file(GLOB SRC "./*_s8.c") -target_sources(cmsis-nn PRIVATE ${SRC} arm_q7_to_q15_with_offset.c - arm_nn_mat_mul_kernel_s16.c - arm_q7_to_q15_with_offset.c - arm_nn_mat_mul_kernel_s16.c - arm_nn_vec_mat_mult_t_s16.c - arm_q7_to_q15_no_shift.c) - diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c deleted file mode 100644 index c3f666a..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_accumulate_q7_to_q15.c +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_accumulate_q7_to_q15.c - * Description: Accumulate q7 vector into q15 one. - * - * $Date: 20 July 2021 - * $Revision: V.1.1.2 - * - * pSrc Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -void arm_nn_accumulate_q7_to_q15(q15_t *pDst, const q7_t *pSrc, uint32_t length) -{ - q15_t *pCnt = pDst; - const q7_t *pV = pSrc; - int32_t count = length; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - q31_t v1, v2, vo1, vo2; - count = length >> 2; - q31_t in; - - while (count > 0l) - { - q31_t value = arm_nn_read_q7x4_ia(&pV); - v1 = __SXTB16(__ROR((uint32_t)value, 8)); - v2 = __SXTB16(value); -#ifndef ARM_MATH_BIG_ENDIAN - vo2 = (q31_t)__PKHTB(v1, v2, 16); - vo1 = (q31_t)__PKHBT(v2, v1, 16); -#else - vo1 = (q31_t)__PKHTB(v1, v2, 16); - vo2 = (q31_t)__PKHBT(v2, v1, 16); -#endif - - in = arm_nn_read_q15x2(pCnt); - arm_nn_write_q15x2_ia(&pCnt, __QADD16(vo1, in)); - - in = arm_nn_read_q15x2(pCnt); - arm_nn_write_q15x2_ia(&pCnt, __QADD16(vo2, in)); - - count--; - } - count = length & 0x3; -#endif - while (count > 0l) - { - *pCnt++ += *pV++; - count--; - } -} - -/** - * @} end of NNBasicMath group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c deleted file mode 100644 index 511e586..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_add_q7.c +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_add_q7.c - * Description: Non saturating addition of elements of a q7 vector. - * - * $Date: 20. July 2021 - * $Revision: V.1.1.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nn_tables.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -void arm_nn_add_q7(const q7_t *input, q31_t *output, uint32_t block_size) -{ - uint32_t block_count; - q31_t result = 0; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Loop unrolling: Compute 4 outputs at a time */ - block_count = block_size >> 2U; - - while (block_count > 0U) - { - const int32_t mult_q15x2 = (1UL << 16) | 1UL; - q31_t in_q7x4 = arm_nn_read_q7x4_ia(&input); - q31_t temp_q15x2 = __SXTAB16(__SXTB16(in_q7x4), __ROR((uint32_t)in_q7x4, 8)); - - result = __SMLAD(temp_q15x2, mult_q15x2, result); - - /* Decrement loop counter */ - block_count--; - } - - /* Loop unrolling: Compute remaining outputs */ - block_count = block_size & 0x3; -#else - block_count = block_size; -#endif - while (block_count > 0U) - { - /* Add and store result in destination buffer. */ - result += *input++; - - /* Decrement loop counter */ - block_count--; - } - - *output = result; -} - -/** - * @} end of NNBasicMath group - */ \ No newline at end of file diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c deleted file mode 100644 index b633ef4..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_padded_s8.c +++ /dev/null @@ -1,168 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_depthwise_conv_nt_t_padded_s8.c - * Description: Depthwise convolution with padded matrices. - * - * $Date: 09. October 2020 - * $Revision: V.1.0.2 - * - * Target Processor: Cortex-M processors with MVE extension - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -/* - * Depthwise convolution of transposed rhs matrix with 4 lhs matrices. One or more of the rhs matrices are padded. - * Dimensions are the same for lhs and rhs. - * - * Refer header file for details. - * - */ - -q7_t *arm_nn_depthwise_conv_nt_t_padded_s8(const q7_t *lhs, - const q7_t *rhs, - const int32_t input_offset, - const uint16_t num_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int32_t activation_min, - const int32_t activation_max, - const uint16_t row_x_col, - const int32_t *const output_bias, - q7_t *out) -{ -#if defined(ARM_MATH_MVEI) - int32_t loop_count = (num_ch + 3) / 4; - const int32_t *bias = output_bias; - uint32_t num_ch_to_process = num_ch; - - for (int i_loop_cnt = 0, offset = 0; i_loop_cnt < loop_count; - num_ch_to_process -= 4, out += 4, offset += 4, i_loop_cnt++) - { - int32x4_t out_0 = vldrwq_s32(bias); - int32x4_t out_1 = out_0; - int32x4_t out_2 = out_0; - int32x4_t out_3 = out_0; - bias += 4; - - const int8_t *rhs_0 = rhs + offset; - const int8_t *lhs_0 = lhs + offset; - const int8_t *lhs_1 = lhs + row_x_col * num_ch + offset; - const int8_t *lhs_2 = lhs + (row_x_col * num_ch * 2) + offset; - const int8_t *lhs_3 = lhs + (row_x_col * num_ch * 3) + offset; - - for (int i_row_x_col = 0; i_row_x_col < row_x_col; i_row_x_col++) - { - const int32x4_t ker_0 = vldrbq_s32(rhs_0); - - int32x4_t ip_0 = vldrbq_s32(lhs_0); - ip_0 = vaddq_n_s32(ip_0, input_offset); - out_0 += vmulq_s32(ip_0, ker_0); - - int32x4_t ip_1 = vldrbq_s32(lhs_1); - ip_1 = vaddq_n_s32(ip_1, input_offset); - out_1 += vmulq_s32(ip_1, ker_0); - - int32x4_t ip_2 = vldrbq_s32(lhs_2); - ip_2 = vaddq_n_s32(ip_2, input_offset); - out_2 += vmulq_s32(ip_2, ker_0); - - int32x4_t ip_3 = vldrbq_s32(lhs_3); - ip_3 = vaddq_n_s32(ip_3, input_offset); - - out_3 += vmulq_s32(ip_3, ker_0); - - lhs_0 += num_ch; - lhs_1 += num_ch; - lhs_2 += num_ch; - lhs_3 += num_ch; - - rhs_0 += num_ch; - } - - const int32x4_t mult = vldrwq_s32(out_mult); - const int32x4_t shift = vldrwq_s32(out_shift); - out_mult += 4; - out_shift += 4; - - out_0 = arm_requantize_mve_32x4(out_0, mult, shift); - out_0 = vaddq_n_s32(out_0, out_offset); - out_0 = vmaxq_s32(out_0, vdupq_n_s32(activation_min)); - out_0 = vminq_s32(out_0, vdupq_n_s32(activation_max)); - mve_pred16_t p = vctp32q(num_ch_to_process); - vstrbq_p_s32(out, out_0, p); - - out_1 = arm_requantize_mve_32x4(out_1, mult, shift); - out_1 = vaddq_n_s32(out_1, out_offset); - out_1 = vmaxq_s32(out_1, vdupq_n_s32(activation_min)); - out_1 = vminq_s32(out_1, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out + num_ch, out_1, p); - - out_2 = arm_requantize_mve_32x4(out_2, mult, shift); - out_2 = vaddq_n_s32(out_2, out_offset); - out_2 = vmaxq_s32(out_2, vdupq_n_s32(activation_min)); - out_2 = vminq_s32(out_2, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out + 2 * num_ch, out_2, p); - - out_3 = arm_requantize_mve_32x4(out_3, mult, shift); - out_3 = vaddq_n_s32(out_3, out_offset); - out_3 = vmaxq_s32(out_3, vdupq_n_s32(activation_min)); - out_3 = vminq_s32(out_3, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out + 3 * num_ch, out_3, p); - } - - const int tail_ch = num_ch & 0x3; - if (tail_ch != 0) - { - out -= (4 - tail_ch); - } - return out + (3 * num_ch); - -#else - (void)lhs; - (void)rhs; - (void)input_offset; - (void)num_ch; - (void)out_shift; - (void)out_mult; - (void)out_offset; - (void)activation_min; - (void)activation_max; - (void)row_x_col; - (void)output_bias; - (void)out; - return NULL; -#endif -} - -/** - * @} end of NNBasicMath group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c deleted file mode 100644 index dda12fd..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_depthwise_conv_nt_t_s8.c +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_depthwise_conv_nt_t_s8.c - * Description: Depthwise convolution on matrices with no padding. - * - * $Date: 09. October 2020 - * $Revision: V.1.0.2 - * - * Target Processor: Cortex-M processors with MVE extension. - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -/* - * Depthwise convolution of rhs matrix with 4 lhs matrices with no padding. Dimensions are the same for lhs and rhs. - * - * Refer header file for details. - * - */ - -q7_t *arm_nn_depthwise_conv_nt_t_s8(const q7_t *lhs, - const q7_t *rhs, - const int32_t input_offset, - const uint16_t num_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int32_t out_offset, - const int32_t activation_min, - const int32_t activation_max, - const uint16_t row_x_col, - const int32_t *const output_bias, - q7_t *out) -{ -#if defined(ARM_MATH_MVEI) - const int32_t *bias = output_bias; - int32_t loop_count = (num_ch + 3) / 4; - uint32_t num_ch_to_process = num_ch; - - for (int i_loop_cnt = 0, offset = 0; i_loop_cnt < loop_count; - num_ch_to_process -= 4, offset += 4, out += 4, i_loop_cnt++) - { - int32x4_t out_0 = vldrwq_s32(bias); - int32x4_t out_1 = out_0; - int32x4_t out_2 = out_0; - int32x4_t out_3 = out_0; - bias += 4; - - const int8_t *rhs_0 = rhs + offset; - const int8_t *lhs_0 = lhs + offset; - const int8_t *lhs_1 = lhs + row_x_col * num_ch + offset; - const int8_t *lhs_2 = lhs + (row_x_col * num_ch * 2) + offset; - const int8_t *lhs_3 = lhs + (row_x_col * num_ch * 3) + offset; - int32x4_t ker_sum = vdupq_n_s32(0); - - for (int i_row_x_col = 0; i_row_x_col < row_x_col; i_row_x_col++) - { - const int32x4_t ker_0 = vldrbq_s32(rhs_0); - ker_sum = vaddq_s32(ker_sum, ker_0); - - int32x4_t ip_0 = vldrbq_s32(lhs_0); - out_0 += vmulq_s32(ip_0, ker_0); - - int32x4_t ip_1 = vldrbq_s32(lhs_1); - out_1 += vmulq_s32(ip_1, ker_0); - - int32x4_t ip_2 = vldrbq_s32(lhs_2); - out_2 += vmulq_s32(ip_2, ker_0); - - int32x4_t ip_3 = vldrbq_s32(lhs_3); - out_3 += vmulq_s32(ip_3, ker_0); - - lhs_0 += num_ch; - lhs_1 += num_ch; - lhs_2 += num_ch; - lhs_3 += num_ch; - - rhs_0 += num_ch; - } - - ker_sum = vmulq_n_s32(ker_sum, input_offset); - out_0 = ker_sum + out_0; - out_1 = ker_sum + out_1; - out_2 = ker_sum + out_2; - out_3 = ker_sum + out_3; - - const int32x4_t mult = vldrwq_s32(out_mult); - const int32x4_t shift = vldrwq_s32(out_shift); - out_mult += 4; - out_shift += 4; - mve_pred16_t p = vctp32q(num_ch_to_process); - - out_0 = arm_requantize_mve_32x4(out_0, mult, shift); - out_0 = vaddq_n_s32(out_0, out_offset); - out_0 = vmaxq_s32(out_0, vdupq_n_s32(activation_min)); - out_0 = vminq_s32(out_0, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out, out_0, p); - - out_1 = arm_requantize_mve_32x4(out_1, mult, shift); - out_1 = vaddq_n_s32(out_1, out_offset); - out_1 = vmaxq_s32(out_1, vdupq_n_s32(activation_min)); - out_1 = vminq_s32(out_1, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out + num_ch, out_1, p); - - out_2 = arm_requantize_mve_32x4(out_2, mult, shift); - out_2 = vaddq_n_s32(out_2, out_offset); - out_2 = vmaxq_s32(out_2, vdupq_n_s32(activation_min)); - out_2 = vminq_s32(out_2, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out + 2 * num_ch, out_2, p); - - out_3 = arm_requantize_mve_32x4(out_3, mult, shift); - out_3 = vaddq_n_s32(out_3, out_offset); - out_3 = vmaxq_s32(out_3, vdupq_n_s32(activation_min)); - out_3 = vminq_s32(out_3, vdupq_n_s32(activation_max)); - vstrbq_p_s32(out + 3 * num_ch, out_3, p); - } - - const int tail_ch = num_ch & 0x3; - if (tail_ch != 0) - { - out -= (4 - tail_ch); - } - - return out + (3 * num_ch); -#else - (void)lhs; - (void)rhs; - (void)input_offset; - (void)num_ch; - (void)out_shift; - (void)out_mult; - (void)out_offset; - (void)activation_min; - (void)activation_max; - (void)row_x_col; - (void)output_bias; - (void)out; - return NULL; -#endif -} - -/** - * @} end of NNBasicMath group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s8.c deleted file mode 100644 index 8b1bf6e..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_1x_s8.c +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (C) 2010-2022 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mat_mul_core_1x_s8.c - * Description: General Matrix-multiplication function - * - * $Date: 19. April 2022 - * $Revision: V.1.0.3 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -/* - * s8 matrix multiplication to process 1 row - * - * Refer header file for details. - * - */ - -arm_status arm_nn_mat_mul_core_1x_s8(int32_t row_elements, - const int8_t *row_base, - const int8_t *col_base, - int32_t *const sum_col, - int32_t *const output) -{ - int32_t acc_n0 = 0; - int32_t sum_tmp = 0; - -#if defined(ARM_MATH_MVEI) && !defined(ARM_MATH_AUTOVECTORIZE) - - __ASM volatile(" vldrb.8 q0, [%[col]], #16 \n" - " wlstp.8 lr, %[cnt], 1f \n" - "2: \n" - " vaddva.s8 %[sum], q0 \n" - " vldrb.8 q1, [%[row0]], #16 \n" - " vmladava.s8 %[out0], q0, q1 \n" - " vldrb.8 q0, [%[col]], #16 \n" - " letp lr, 2b \n" - "1: \n" - : [col] "+r"(col_base), [sum] "+Te"(sum_tmp), [row0] "+r"(row_base), [out0] "+Te"(acc_n0) - : [cnt] "r"(row_elements) - : "q0", "q1", "memory", "r14"); -#else - for (int i = 0; i < row_elements; i++) - { - sum_tmp += col_base[i]; - acc_n0 += row_base[i] * col_base[i]; - } -#endif - - *sum_col = sum_tmp; - *output = acc_n0; - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNBasicMath group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c deleted file mode 100644 index ff427ad..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_core_4x_s8.c +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (C) 2010-2022 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mat_mul_core_4x_s8.c - * Description: General matrix multiplication function for MVE extension - * - * $Date: 19. April 2022 - * $Revision: V.3.0.1 - * - * Target Processor: Cortex-M processors - * -------------------------------------------------------------------- */ -#include "arm_nn_types.h" -#include "arm_nnsupportfunctions.h" -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -/* - * s8 matrix multiplication to process 4 rows and one column - * - * Refer header file for details. - * - */ - -int8_t *arm_nn_mat_mul_core_4x_s8(const int32_t row_elements, - const int32_t offset, - const int8_t *row_base, - const int8_t *col_base_ref, - const int32_t out_ch, - const cmsis_nn_conv_params *conv_params, - const cmsis_nn_per_channel_quant_params *quant_params, - const int32_t *bias, - int8_t *output) -{ - -#if defined(ARM_MATH_MVEI) - for (int i = 0; i < out_ch; i++) - { - int32_t acc_n0 = 0; - int32_t acc_n1 = 0; - int32_t acc_n2 = 0; - int32_t acc_n3 = 0; - - const int8_t *ip_row_0 = row_base; - const int8_t *ip_row_1 = row_base + offset; - const int8_t *ip_row_2 = row_base + (2 * offset); - const int8_t *ip_row_3 = row_base + (3 * offset); - const int8_t *col_base = col_base_ref + i * row_elements; - int32_t sum_tmp = 0; - - __ASM volatile(" vldrb.8 q0, [%[col]], #16 \n" - " wlstp.8 lr, %[cnt], 1f \n" - "2: \n" - " vaddva.s8 %[sum], q0 \n" - " vldrb.8 q1, [%[row0]], #16 \n" - " vmladava.s8 %[out0], q0, q1 \n" - " vldrb.8 q2, [%[row1]], #16 \n" - " vmladava.s8 %[out1], q0, q2 \n" - " vldrb.8 q3, [%[row2]], #16 \n" - " vmladava.s8 %[out2], q0, q3 \n" - " vldrb.8 q4, [%[row3]], #16 \n" - " vmladava.s8 %[out3], q0, q4 \n" - " vldrb.8 q0, [%[col]], #16 \n" - " letp lr, 2b \n" - "1: \n" - : [col] "+r"(col_base), - [sum] "+Te"(sum_tmp), - [row0] "+r"(ip_row_0), - [row1] "+r"(ip_row_1), - [row2] "+r"(ip_row_2), - [row3] "+r"(ip_row_3), - [out0] "+Te"(acc_n0), - [out1] "+Te"(acc_n1), - [out2] "+Te"(acc_n2), - [out3] "+Te"(acc_n3) - : [cnt] "r"(row_elements) - : "q0", "q1", "q2", "q3", "q4", "memory", "r14"); - - int32x4_t res = {acc_n0, acc_n1, acc_n2, acc_n3}; - sum_tmp *= conv_params->input_offset; - if (bias) - { - sum_tmp += bias[i]; - } - res = vaddq_n_s32(res, sum_tmp); - - res = arm_requantize_mve(res, quant_params->multiplier[i], quant_params->shift[i]); - res = vaddq_n_s32(res, conv_params->output_offset); - - res = vmaxq_s32(res, vdupq_n_s32(conv_params->activation.min)); - res = vminq_s32(res, vdupq_n_s32(conv_params->activation.max)); - - const uint32x4_t scatter_offset = {0, (uint32_t)out_ch, (uint32_t)out_ch * 2, (uint32_t)out_ch * 3}; - vstrbq_scatter_offset_s32(output, scatter_offset, res); - output++; - } - - return output + (3 * out_ch); -#else - (void)row_elements; - (void)offset; - (void)row_base; - (void)col_base_ref; - (void)out_ch; - (void)conv_params; - (void)quant_params; - (void)bias; - (void)output; - return NULL; -#endif -} - -/** - * @} end of NNBasicMath group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_kernel_s16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_kernel_s16.c deleted file mode 100644 index 41d0bc9..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mul_kernel_s16.c +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mat_mult_kernel_s16.c - * Description: Matrix-multiplication function for convolution - * - * $Date: 12 August 2021 - * $Revision: V.1.1.0 - * - * Target Processor: Cortex-M cores - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/* - * Matrix-multiplication function for convolution with per-channel requantization. - * - * Refer header file for details. - * - */ - -q15_t *arm_nn_mat_mult_kernel_s16(const q7_t *input_a, - const q15_t *input_b, - const int32_t output_ch, - const int32_t *out_shift, - const int32_t *out_mult, - const int16_t activation_min, - const int16_t activation_max, - const int32_t num_col_a, - const int64_t *const output_bias, - q15_t *out_0) -{ - -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* set up the second output pointers */ - q15_t *out_1 = out_0 + output_ch; - const int64_t *bias = output_bias; - uint16_t row_count = output_ch / 2; - const q7_t *ip_a0 = input_a; - - /* this loop over rows in A */ - while (row_count) - { - /* setup pointers for B */ - const q15_t *ip_b0 = input_b; - const q15_t *ip_b1 = ip_b0 + num_col_a; - - /* align the second pointer for A */ - const q7_t *ip_a1 = ip_a0 + num_col_a; - - /* Init accumulator for channel N and N + 1 */ - q31_t ch_0_out_0 = 0; - q31_t ch_0_out_1 = 0; - q31_t ch_1_out_0 = 0; - q31_t ch_1_out_1 = 0; - - uint16_t col_count = num_col_a / 4; - /* accumulate over the vector */ - while (col_count) - { - q31_t a01, a02, a11, a12; - q31_t b0 = arm_nn_read_q15x2_ia(&ip_b0); - q31_t b1 = arm_nn_read_q15x2_ia(&ip_b1); - - ip_a0 = read_and_pad(ip_a0, &a01, &a02); - ip_a1 = read_and_pad(ip_a1, &a11, &a12); - - ch_0_out_0 = __SMLAD(a01, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a01, b1, ch_0_out_1); - ch_1_out_0 = __SMLAD(a11, b0, ch_1_out_0); - ch_1_out_1 = __SMLAD(a11, b1, ch_1_out_1); - - b0 = arm_nn_read_q15x2_ia(&ip_b0); - b1 = arm_nn_read_q15x2_ia(&ip_b1); - - ch_0_out_0 = __SMLAD(a02, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a02, b1, ch_0_out_1); - ch_1_out_0 = __SMLAD(a12, b0, ch_1_out_0); - ch_1_out_1 = __SMLAD(a12, b1, ch_1_out_1); - - col_count--; - } /* while over col_count */ - col_count = num_col_a & 0x3; - while (col_count) - { - q7_t a0 = *ip_a0++; - q15_t b0 = *ip_b0++; - q7_t a1 = *ip_a1++; - q15_t b1 = *ip_b1++; - - ch_0_out_0 += a0 * b0; - ch_0_out_1 += a0 * b1; - ch_1_out_0 += a1 * b0; - ch_1_out_1 += a1 * b1; - col_count--; - } /* while over col_count */ - if (bias) - { - q31_t reduced_multiplier = REDUCE_MULTIPLIER(*out_mult); - q63_t acc_64 = ch_0_out_0 + *bias; - ch_0_out_0 = arm_nn_requantize_s64(acc_64, reduced_multiplier, *out_shift); - acc_64 = ch_0_out_1 + *bias++; - ch_0_out_1 = arm_nn_requantize_s64(acc_64, reduced_multiplier, *out_shift); - out_mult++; - } - else - { - ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift); - ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift); - out_mult++; - } - ch_0_out_0 = MAX(ch_0_out_0, activation_min); - ch_0_out_0 = MIN(ch_0_out_0, activation_max); - *out_0++ = (q15_t)ch_0_out_0; - - ch_0_out_1 = MAX(ch_0_out_1, activation_min); - ch_0_out_1 = MIN(ch_0_out_1, activation_max); - *out_1++ = (q15_t)ch_0_out_1; - out_shift++; - - if (bias) - { - q31_t reduced_multiplier = REDUCE_MULTIPLIER(*out_mult); - q63_t acc_64 = ch_1_out_0 + *bias; - ch_1_out_0 = arm_nn_requantize_s64(acc_64, reduced_multiplier, *out_shift); - acc_64 = ch_1_out_1 + *bias++; - ch_1_out_1 = arm_nn_requantize_s64(acc_64, reduced_multiplier, *out_shift); - out_mult++; - } - else - { - ch_1_out_0 = arm_nn_requantize(ch_1_out_0, *out_mult, *out_shift); - ch_1_out_1 = arm_nn_requantize(ch_1_out_1, *out_mult, *out_shift); - out_mult++; - } - ch_1_out_0 = MAX(ch_1_out_0, activation_min); - ch_1_out_0 = MIN(ch_1_out_0, activation_max); - *out_0++ = (q15_t)ch_1_out_0; - - ch_1_out_1 = MAX(ch_1_out_1, activation_min); - ch_1_out_1 = MIN(ch_1_out_1, activation_max); - *out_1++ = (q15_t)ch_1_out_1; - out_shift++; - - /* skip row */ - ip_a0 += num_col_a; - row_count--; - } - - /* compute the last odd numbered row if any */ - if (output_ch & 0x1) - { - /* setup pointers for B */ - const q15_t *ip_b0 = input_b; - const q15_t *ip_b1 = ip_b0 + num_col_a; - - q31_t ch_0_out_0 = 0; - q31_t ch_0_out_1 = 0; - - uint16_t col_count = num_col_a >> 2; - while (col_count) - { - q31_t a01, a02; - q31_t b0 = arm_nn_read_q15x2_ia(&ip_b0); - q31_t b1 = arm_nn_read_q15x2_ia(&ip_b1); - - ip_a0 = read_and_pad(ip_a0, &a01, &a02); - - ch_0_out_0 = __SMLAD(a01, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a01, b1, ch_0_out_1); - - b0 = arm_nn_read_q15x2_ia(&ip_b0); - b1 = arm_nn_read_q15x2_ia(&ip_b1); - ch_0_out_0 = __SMLAD(a02, b0, ch_0_out_0); - ch_0_out_1 = __SMLAD(a02, b1, ch_0_out_1); - - col_count--; - } - col_count = num_col_a & 0x3; - while (col_count) - { - q7_t a0 = *ip_a0++; - q15_t b0 = *ip_b0++; - q15_t b1 = *ip_b1++; - - ch_0_out_0 += a0 * b0; - ch_0_out_1 += a0 * b1; - col_count--; - } - if (bias) - { - q31_t reduced_multiplier = REDUCE_MULTIPLIER(*out_mult); - q63_t acc_64 = ch_0_out_0 + *bias; - ch_0_out_0 = arm_nn_requantize_s64(acc_64, reduced_multiplier, *out_shift); - acc_64 = ch_0_out_1 + *bias++; - ch_0_out_1 = arm_nn_requantize_s64(acc_64, reduced_multiplier, *out_shift); - } - else - { - ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift); - ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift); - } - ch_0_out_0 = MAX(ch_0_out_0, activation_min); - ch_0_out_0 = MIN(ch_0_out_0, activation_max); - *out_0++ = (q15_t)ch_0_out_0; - - ch_0_out_1 = MAX(ch_0_out_1, activation_min); - ch_0_out_1 = MIN(ch_0_out_1, activation_max); - *out_1++ = (q15_t)ch_0_out_1; - out_mult++; - out_shift++; - } - - out_0 += output_ch; - - /* return the new output pointer with offset */ - return out_0; -#else - (void)input_a; - (void)input_b; - (void)output_ch; - (void)out_shift; - (void)out_mult; - (void)activation_min; - (void)activation_max; - (void)num_col_a; - (void)output_bias; - (void)out_0; - /* To be completed */ - return NULL; -#endif -} diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c deleted file mode 100644 index d0420c2..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mat_mult_nt_t_s8.c +++ /dev/null @@ -1,582 +0,0 @@ -/* - * Copyright (C) 2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mat_mult_s8_nt_t_s8 - * Description: Matrix multiplication support function with the right-hand-side (rhs) matrix transposed - * - * $Date: 09. October 2020 - * $Revision: V.1.0.3 - * - * Target Processor: Cortex-M - * - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -/* - * s8 matrix multiplication with the right-hand-side matrix transposed - * - * Refer header file for details. - * - */ -arm_status arm_nn_mat_mult_nt_t_s8(const q7_t *lhs, - const q7_t *rhs, - const q31_t *bias, - q7_t *dst, - const int32_t *dst_multipliers, - const int32_t *dst_shifts, - const int32_t lhs_rows, - const int32_t rhs_rows, - const int32_t rhs_cols, - const int32_t lhs_offset, - const int32_t dst_offset, - const int32_t activation_min, - const int32_t activation_max) -{ -#if defined(ARM_MATH_DSP) - const int32_t off0 = rhs_cols - 4; - - for (int32_t rhs_rows_idx = 0; rhs_rows_idx <= (rhs_rows - 2); rhs_rows_idx += 2) - { - const q7_t *lhs_ptr = &lhs[0]; - q7_t *dst_ptr = &dst[0]; - - q31_t lhs_offset_contribution0 = 0; - q31_t lhs_offset_contribution1 = 0; - - for (int32_t x = 0; x < rhs_cols; ++x) - { - lhs_offset_contribution0 += rhs[x]; - lhs_offset_contribution1 += rhs[x + rhs_cols]; - } - - lhs_offset_contribution0 *= lhs_offset; - lhs_offset_contribution1 *= lhs_offset; - if (bias) - { - lhs_offset_contribution0 += bias[rhs_rows_idx]; - lhs_offset_contribution1 += bias[rhs_rows_idx + 1]; - } - - int32_t lhs_rows_idx = lhs_rows >> 1; - - while (lhs_rows_idx) - { - const q7_t *rhs_ptr = &rhs[0]; - - q31_t res00 = lhs_offset_contribution0; - q31_t res01 = lhs_offset_contribution1; - q31_t res10 = lhs_offset_contribution0; - q31_t res11 = lhs_offset_contribution1; - - int32_t rhs_cols_idx = 0; - - q31_t val0, val1, val2, val3, val4, val5; - - for (; rhs_cols_idx <= (rhs_cols - 16); rhs_cols_idx += 16) - { - val1 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - val2 = __SXTB16(val1); - val0 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val3 = __SXTB16(val0); - val4 = arm_nn_read_q7x4((const q7_t *)&rhs_ptr[off0]); - val1 = __SXTB16_RORn(val1, 8); - val0 = __SXTB16_RORn(val0, 8); - - // 4 x MAC res00, res01 - res00 = __SMLAD(val3, val2, res00); - val5 = __SXTB16(val4); - res00 = __SMLAD(val0, val1, res00); - val4 = __SXTB16_RORn(val4, 8); - res01 = __SMLAD(val3, val5, res01); - res01 = __SMLAD(val0, val4, res01); - - // 4 x MAC res10, res11 - val0 = arm_nn_read_q7x4((const q7_t *)&lhs_ptr[off0]); - val3 = __SXTB16(val0); - val0 = __SXTB16_RORn(val0, 8); - res10 = __SMLAD(val3, val2, res10); - res11 = __SMLAD(val3, val5, res11); - res10 = __SMLAD(val0, val1, res10); - val1 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - res11 = __SMLAD(val0, val4, res11); - - val4 = arm_nn_read_q7x4((const q7_t *)&rhs_ptr[off0]); - val2 = __SXTB16(val1); - val0 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val3 = __SXTB16(val0); - val1 = __SXTB16_RORn(val1, 8); - val0 = __SXTB16_RORn(val0, 8); - - // 4 x MAC res00, res01 - res00 = __SMLAD(val3, val2, res00); - val5 = __SXTB16(val4); - res00 = __SMLAD(val0, val1, res00); - val4 = __SXTB16_RORn(val4, 8); - res01 = __SMLAD(val3, val5, res01); - res01 = __SMLAD(val0, val4, res01); - - // 4 x MAC res10, res11 - val0 = arm_nn_read_q7x4((const q7_t *)&lhs_ptr[off0]); - val3 = __SXTB16(val0); - val0 = __SXTB16_RORn(val0, 8); - res10 = __SMLAD(val3, val2, res10); - res11 = __SMLAD(val3, val5, res11); - res10 = __SMLAD(val0, val1, res10); - val1 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - res11 = __SMLAD(val0, val4, res11); - - val4 = arm_nn_read_q7x4((const q7_t *)&rhs_ptr[off0]); - val2 = __SXTB16(val1); - val0 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val3 = __SXTB16(val0); - val1 = __SXTB16_RORn(val1, 8); - val0 = __SXTB16_RORn(val0, 8); - - // 4 x MAC res00, res01 - res00 = __SMLAD(val3, val2, res00); - val5 = __SXTB16(val4); - res00 = __SMLAD(val0, val1, res00); - val4 = __SXTB16_RORn(val4, 8); - res01 = __SMLAD(val3, val5, res01); - res01 = __SMLAD(val0, val4, res01); - - // 4 x MAC res10, res11 - val0 = arm_nn_read_q7x4((const q7_t *)&lhs_ptr[off0]); - val3 = __SXTB16(val0); - val0 = __SXTB16_RORn(val0, 8); - res10 = __SMLAD(val3, val2, res10); - res11 = __SMLAD(val3, val5, res11); - res10 = __SMLAD(val0, val1, res10); - val1 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - res11 = __SMLAD(val0, val4, res11); - - val4 = arm_nn_read_q7x4((const q7_t *)&rhs_ptr[off0]); - val2 = __SXTB16(val1); - val0 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val3 = __SXTB16(val0); - val1 = __SXTB16_RORn(val1, 8); - val0 = __SXTB16_RORn(val0, 8); - - // 4 x MAC res00, res01 - res00 = __SMLAD(val3, val2, res00); - val5 = __SXTB16(val4); - res00 = __SMLAD(val0, val1, res00); - val4 = __SXTB16_RORn(val4, 8); - res01 = __SMLAD(val3, val5, res01); - res01 = __SMLAD(val0, val4, res01); - - // 4 x MAC res10, res11 - val0 = arm_nn_read_q7x4((const q7_t *)&lhs_ptr[off0]); - val3 = __SXTB16(val0); - val0 = __SXTB16_RORn(val0, 8); - res10 = __SMLAD(val3, val2, res10); - res11 = __SMLAD(val3, val5, res11); - res10 = __SMLAD(val0, val1, res10); - res11 = __SMLAD(val0, val4, res11); - } - - for (; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) - { - q7_t rhs_value0 = rhs_ptr[0]; - q7_t rhs_value1 = rhs_ptr[rhs_cols]; - q7_t lhs_value = lhs_ptr[0]; - - res00 += lhs_value * rhs_value0; - res01 += lhs_value * rhs_value1; - - lhs_value = lhs_ptr[rhs_cols]; - res10 += lhs_value * rhs_value0; - res11 += lhs_value * rhs_value1; - - ++rhs_ptr; - ++lhs_ptr; - } - - // Quantize down - res00 = arm_nn_requantize(res00, dst_multipliers[rhs_rows_idx], dst_shifts[rhs_rows_idx]); - res01 = arm_nn_requantize(res01, dst_multipliers[rhs_rows_idx + 1], dst_shifts[rhs_rows_idx + 1]); - res10 = arm_nn_requantize(res10, dst_multipliers[rhs_rows_idx], dst_shifts[rhs_rows_idx]); - res11 = arm_nn_requantize(res11, dst_multipliers[rhs_rows_idx + 1], dst_shifts[rhs_rows_idx + 1]); - - // Add offset - res00 += dst_offset; - res01 += dst_offset; - res10 += dst_offset; - res11 += dst_offset; - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - res01 = MAX(res01, activation_min); - res01 = MIN(res01, activation_max); - res10 = MAX(res10, activation_min); - res10 = MIN(res10, activation_max); - res11 = MAX(res11, activation_min); - res11 = MIN(res11, activation_max); - - dst_ptr[0] = (q7_t)res00; - dst_ptr[1] = (q7_t)res01; - dst_ptr += rhs_rows; - dst_ptr[0] = (q7_t)res10; - dst_ptr[1] = (q7_t)res11; - dst_ptr += rhs_rows; - - lhs_ptr += rhs_cols; - - lhs_rows_idx--; - } - - // Left-over rows - if (lhs_rows % 2) - { - const q7_t *rhs_ptr = &rhs[0]; - - q31_t res00 = lhs_offset_contribution0; - q31_t res01 = lhs_offset_contribution1; - - int32_t rhs_cols_idx = 0; - - q31_t val0, val1, val2, val3, val4, val5; - for (; rhs_cols_idx <= (rhs_cols - 16); rhs_cols_idx += 16) - { - val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - val1 = arm_nn_read_q7x4((const q7_t *)&rhs_ptr[off0]); - val2 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val3 = __SXTB16(val0); - val5 = __SXTB16(val2); - val4 = __SXTB16(val1); - val0 = __SXTB16_RORn(val0, 8); - val2 = __SXTB16_RORn(val2, 8); - val1 = __SXTB16_RORn(val1, 8); - - // 4 x MAC res00, res01 - res00 = __SMLAD(val5, val3, res00); - res00 = __SMLAD(val2, val0, res00); - res01 = __SMLAD(val5, val4, res01); - res01 = __SMLAD(val2, val1, res01); - - val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - val1 = arm_nn_read_q7x4((const q7_t *)&rhs_ptr[off0]); - val2 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val3 = __SXTB16(val0); - val5 = __SXTB16(val2); - val4 = __SXTB16(val1); - val0 = __SXTB16_RORn(val0, 8); - val2 = __SXTB16_RORn(val2, 8); - val1 = __SXTB16_RORn(val1, 8); - - // 4 x MAC res00, res01 - res00 = __SMLAD(val5, val3, res00); - res00 = __SMLAD(val2, val0, res00); - res01 = __SMLAD(val5, val4, res01); - res01 = __SMLAD(val2, val1, res01); - - val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - val1 = arm_nn_read_q7x4((const q7_t *)&rhs_ptr[off0]); - val2 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val3 = __SXTB16(val0); - val5 = __SXTB16(val2); - val4 = __SXTB16(val1); - val0 = __SXTB16_RORn(val0, 8); - val2 = __SXTB16_RORn(val2, 8); - val1 = __SXTB16_RORn(val1, 8); - - // 4 x MAC res00, res01 - res00 = __SMLAD(val5, val3, res00); - res00 = __SMLAD(val2, val0, res00); - res01 = __SMLAD(val5, val4, res01); - res01 = __SMLAD(val2, val1, res01); - - val0 = arm_nn_read_q7x4_ia((const q7_t **)&rhs_ptr); - val1 = arm_nn_read_q7x4((const q7_t *)&rhs_ptr[off0]); - val2 = arm_nn_read_q7x4_ia((const q7_t **)&lhs_ptr); - val3 = __SXTB16(val0); - val5 = __SXTB16(val2); - val4 = __SXTB16(val1); - val0 = __SXTB16_RORn(val0, 8); - val2 = __SXTB16_RORn(val2, 8); - val1 = __SXTB16_RORn(val1, 8); - - // 4 x MAC res00, res01 - res00 = __SMLAD(val5, val3, res00); - res00 = __SMLAD(val2, val0, res00); - res01 = __SMLAD(val5, val4, res01); - res01 = __SMLAD(val2, val1, res01); - } - - // Left-over accumulations - for (; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) - { - q7_t rhs_value0 = rhs_ptr[0]; - q7_t rhs_value1 = rhs_ptr[rhs_cols]; - q7_t lhs_value = lhs_ptr[0]; - - res00 += lhs_value * rhs_value0; - res01 += lhs_value * rhs_value1; - - ++rhs_ptr; - ++lhs_ptr; - } - - // Quantize down - res00 = arm_nn_requantize(res00, dst_multipliers[rhs_rows_idx], dst_shifts[rhs_rows_idx]); - res01 = arm_nn_requantize(res01, dst_multipliers[rhs_rows_idx + 1], dst_shifts[rhs_rows_idx + 1]); - - // Add offset - res00 += dst_offset; - res01 += dst_offset; - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - res01 = MAX(res01, activation_min); - res01 = MIN(res01, activation_max); - - dst_ptr[0] = (q7_t)res00; - dst_ptr[1] = (q7_t)res01; - } - - rhs += 2 * rhs_cols; - dst += 2; - } - - if (rhs_rows % 2) - { - const q7_t *lhs_ptr = &lhs[0]; - q7_t *dst_ptr = &dst[0]; - - for (int32_t lhs_rows_idx = 0; lhs_rows_idx < lhs_rows; ++lhs_rows_idx) - { - const q7_t *rhs_ptr = &rhs[0]; - q31_t res00 = 0; - if (bias) - { - res00 = bias[rhs_rows - 1]; - } - - for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) - { - q31_t rhs_value = rhs_ptr[0]; - q31_t lhs_value = lhs_ptr[0] + lhs_offset; - - res00 += lhs_value * rhs_value; - - ++rhs_ptr; - ++lhs_ptr; - } - - // Quantize down - res00 = arm_nn_requantize(res00, dst_multipliers[rhs_rows - 1], dst_shifts[rhs_rows - 1]); - - // Add offset - res00 += dst_offset; - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - - dst_ptr[0] = (q7_t)res00; - dst_ptr += rhs_rows; - } - } -#else - for (int32_t rhs_rows_idx = 0; rhs_rows_idx <= (rhs_rows - 2); rhs_rows_idx += 2) - { - const q7_t *lhs_ptr = &lhs[0]; - q7_t *dst_ptr = &dst[0]; - - q31_t lhs_offset_contribution0 = 0; - q31_t lhs_offset_contribution1 = 0; - - for (int32_t x = 0; x < rhs_cols; ++x) - { - lhs_offset_contribution0 += rhs[x]; - lhs_offset_contribution1 += rhs[x + rhs_cols]; - } - - lhs_offset_contribution0 *= lhs_offset; - lhs_offset_contribution1 *= lhs_offset; - if (bias) - { - lhs_offset_contribution0 += bias[rhs_rows_idx]; - lhs_offset_contribution1 += bias[rhs_rows_idx + 1]; - } - - int32_t lhs_rows_idx = lhs_rows >> 1; - - while (lhs_rows_idx) - { - const q7_t *rhs_ptr = &rhs[0]; - - q31_t res00 = lhs_offset_contribution0; - q31_t res01 = lhs_offset_contribution1; - q31_t res10 = lhs_offset_contribution0; - q31_t res11 = lhs_offset_contribution1; - - for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) - { - q7_t rhs_value0 = rhs_ptr[0]; - q7_t rhs_value1 = rhs_ptr[rhs_cols]; - q7_t lhs_value = lhs_ptr[0]; - - res00 += lhs_value * rhs_value0; - res01 += lhs_value * rhs_value1; - - lhs_value = lhs_ptr[rhs_cols]; - res10 += lhs_value * rhs_value0; - res11 += lhs_value * rhs_value1; - - ++rhs_ptr; - ++lhs_ptr; - } - - // Quantize down - res00 = arm_nn_requantize(res00, dst_multipliers[rhs_rows_idx], dst_shifts[rhs_rows_idx]); - res01 = arm_nn_requantize(res01, dst_multipliers[rhs_rows_idx + 1], dst_shifts[rhs_rows_idx + 1]); - res10 = arm_nn_requantize(res10, dst_multipliers[rhs_rows_idx], dst_shifts[rhs_rows_idx]); - res11 = arm_nn_requantize(res11, dst_multipliers[rhs_rows_idx + 1], dst_shifts[rhs_rows_idx + 1]); - - // Add offset - res00 += dst_offset; - res01 += dst_offset; - res10 += dst_offset; - res11 += dst_offset; - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - res01 = MAX(res01, activation_min); - res01 = MIN(res01, activation_max); - res10 = MAX(res10, activation_min); - res10 = MIN(res10, activation_max); - res11 = MAX(res11, activation_min); - res11 = MIN(res11, activation_max); - - dst_ptr[0] = (q7_t)res00; - dst_ptr[1] = (q7_t)res01; - dst_ptr += rhs_rows; - dst_ptr[0] = (q7_t)res10; - dst_ptr[1] = (q7_t)res11; - dst_ptr += rhs_rows; - - lhs_ptr += rhs_cols; - - lhs_rows_idx--; - } - - // Left-over rows - if (lhs_rows % 2) - { - const q7_t *rhs_ptr = &rhs[0]; - - q31_t res00 = lhs_offset_contribution0; - q31_t res01 = lhs_offset_contribution1; - - for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) - { - q7_t rhs_value0 = rhs_ptr[0]; - q7_t rhs_value1 = rhs_ptr[rhs_cols]; - q7_t lhs_value = lhs_ptr[0]; - - res00 += lhs_value * rhs_value0; - res01 += lhs_value * rhs_value1; - - ++rhs_ptr; - ++lhs_ptr; - } - - // Quantize down - res00 = arm_nn_requantize(res00, dst_multipliers[rhs_rows_idx], dst_shifts[rhs_rows_idx]); - res01 = arm_nn_requantize(res01, dst_multipliers[rhs_rows_idx + 1], dst_shifts[rhs_rows_idx + 1]); - - // Add offset - res00 += dst_offset; - res01 += dst_offset; - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - res01 = MAX(res01, activation_min); - res01 = MIN(res01, activation_max); - - dst_ptr[0] = (q7_t)res00; - dst_ptr[1] = (q7_t)res01; - } - - rhs += 2 * rhs_cols; - dst += 2; - } - - if (rhs_rows % 2) - { - const q7_t *lhs_ptr = &lhs[0]; - q7_t *dst_ptr = &dst[0]; - - for (int32_t lhs_rows_idx = 0; lhs_rows_idx < lhs_rows; ++lhs_rows_idx) - { - const q7_t *rhs_ptr = &rhs[0]; - q31_t res00 = 0; - if (bias) - { - res00 = bias[rhs_rows - 1]; - } - - for (int32_t rhs_cols_idx = rhs_cols; rhs_cols_idx != 0; rhs_cols_idx--) - { - q31_t rhs_value = rhs_ptr[0]; - q31_t lhs_value = lhs_ptr[0] + lhs_offset; - - res00 += lhs_value * rhs_value; - - ++rhs_ptr; - ++lhs_ptr; - } - - // Quantize down - res00 = arm_nn_requantize(res00, dst_multipliers[rhs_rows - 1], dst_shifts[rhs_rows - 1]); - - // Add offset - res00 += dst_offset; - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - - dst_ptr[0] = (q7_t)res00; - dst_ptr += rhs_rows; - } - } -#endif - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNBasicMath group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c deleted file mode 100644 index d6a45ef..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q15.c +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mult_q15.c - * Description: Q15 vector multiplication with variable output shifts - * - * $Date: 20. July 2021 - * $Revision: V.1.1.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -/** - * @brief Q7 vector multiplication with variable output shifts - * @param[in] *pSrcA pointer to the first input vector - * @param[in] *pSrcB pointer to the second input vector - * @param[out] *pDst pointer to the output vector - * @param[in] out_shift amount of right-shift for output - * @param[in] blockSize number of samples in each vector - * - * Scaling and Overflow Behavior: - * \par - * The function uses saturating arithmetic. - * Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated. - */ - -void arm_nn_mult_q15(q15_t *pSrcA, q15_t *pSrcB, q15_t *pDst, const uint16_t out_shift, uint32_t blockSize) -{ - uint32_t blkCnt = blockSize; /* loop counters */ - - while (blkCnt > 0U) - { - /* C = A * B */ - /* Multiply the inputs and store the result in the destination buffer */ - *pDst++ = (q15_t)__SSAT(((q31_t)((q31_t)(*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 16); - - /* Decrement the blockSize loop counter */ - blkCnt--; - } -} - -/** - * @} end of NNBasicMath group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c deleted file mode 100644 index fdced4c..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_mult_q7.c +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_mult_q7.c - * Description: Q7 vector multiplication with variable output shifts - * - * $Date: 20. July 2021 - * $Revision: V.1.1.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -/** - * @brief Q7 vector multiplication with variable output shifts - * @param[in] *pSrcA pointer to the first input vector - * @param[in] *pSrcB pointer to the second input vector - * @param[out] *pDst pointer to the output vector - * @param[in] out_shift amount of right-shift for output - * @param[in] blockSize number of samples in each vector - * - * Scaling and Overflow Behavior: - * \par - * The function uses saturating arithmetic. - * Results outside of the allowable Q7 range [0x80 0x7F] will be saturated. - */ - -void arm_nn_mult_q7(q7_t *pSrcA, q7_t *pSrcB, q7_t *pDst, const uint16_t out_shift, uint32_t blockSize) -{ - uint32_t blkCnt = blockSize; /* loop counters */ - - while (blkCnt > 0U) - { - /* C = A * B */ - /* Multiply the inputs and store the result in the destination buffer */ - *pDst++ = (q7_t)__SSAT(((q15_t)((q15_t)(*pSrcA++) * (*pSrcB++) + NN_ROUND(out_shift)) >> out_shift), 8); - - /* Decrement the blockSize loop counter */ - blkCnt--; - } -} - -/** - * @} end of NNBasicMath group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s16.c deleted file mode 100644 index 5956d3a..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s16.c +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright (C) 2020-2022 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_vec_mat_mult_t_s16 - * Description: s16 vector by matrix (transposed) multiplication - * - * $Date: 04. January 2022 - * $Revision: V.1.2.0 - * - * Target Processor: Cortex-M - * - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -/* - * s16 vector(lhs) by matrix (transposed) multiplication - * - * Refer header file for details. - * - */ -arm_status arm_nn_vec_mat_mult_t_s16(const q15_t *lhs, - const q7_t *rhs, - const q63_t *bias, - q15_t *dst, - const int32_t dst_multiplier, - const int32_t dst_shift, - const int32_t rhs_cols, - const int32_t rhs_rows, - const int32_t activation_min, - const int32_t activation_max) -{ -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - const int32_t row_loop_cnt = rhs_rows / 2; - - int32_t rhs_cols_fast = rhs_cols; - - if (rhs_cols > 512) - { - rhs_cols_fast = 512; - } - - for (int32_t i = 0; i < row_loop_cnt; i++) - { - q63_t acc_64_0 = 0; - q63_t acc_64_1 = 0; - int32_t acc_0 = 0; - int32_t acc_1 = 0; - - const int32_t col_loop_cnt = rhs_cols_fast / 4; - - const int16_t *lhs_vec = lhs; - const int8_t *rhs_0 = rhs; - const int8_t *rhs_1 = rhs + rhs_cols; - rhs += 2 * rhs_cols; - - for (int j = col_loop_cnt; j != 0; j--) - { - int32_t ker_0, ker_1, vec_part_0, vec_part_1; - vec_part_0 = arm_nn_read_q15x2_ia(&lhs_vec); - vec_part_1 = arm_nn_read_q15x2_ia(&lhs_vec); - - rhs_0 = read_and_pad(rhs_0, &ker_0, &ker_1); - - acc_0 = __SMLAD(ker_0, vec_part_0, acc_0); - acc_0 = __SMLAD(ker_1, vec_part_1, acc_0); - - rhs_1 = read_and_pad(rhs_1, &ker_0, &ker_1); - - acc_1 = __SMLAD(ker_0, vec_part_0, acc_1); - acc_1 = __SMLAD(ker_1, vec_part_1, acc_1); - } - - acc_64_0 += acc_0; - acc_64_1 += acc_1; - - for (int k = col_loop_cnt * 4; k < rhs_cols; k++) - { - const int32_t lhs_temp = (*lhs_vec); - lhs_vec++; - acc_64_0 += lhs_temp * (*rhs_0); - rhs_0++; - acc_64_1 += lhs_temp * (*rhs_1); - rhs_1++; - } - - if (bias) - { - acc_64_0 += *bias++; - acc_64_1 += *bias++; - } - q31_t tmp; - tmp = arm_nn_requantize_s64(acc_64_0, dst_multiplier, dst_shift); - tmp = MAX(tmp, activation_min); - tmp = MIN(tmp, activation_max); - *dst++ = (q15_t)tmp; - - tmp = arm_nn_requantize_s64(acc_64_1, dst_multiplier, dst_shift); - tmp = MAX(tmp, activation_min); - tmp = MIN(tmp, activation_max); - *dst++ = (q15_t)tmp; - } - - if (rhs_rows & 0x1) - { - q63_t acc_64_0 = 0; - int32_t acc_0 = 0; - const int32_t col_loop_cnt = rhs_cols_fast / 4; - - const int16_t *lhs_vec = lhs; - const int8_t *rhs_0 = rhs; - - for (int i = col_loop_cnt; i != 0; i--) - { - int32_t ker_0, ker_1, vec; - rhs_0 = read_and_pad(rhs_0, &ker_0, &ker_1); - - vec = arm_nn_read_q15x2_ia(&lhs_vec); - acc_0 = __SMLAD(ker_0, vec, acc_0); - - vec = arm_nn_read_q15x2_ia(&lhs_vec); - acc_0 = __SMLAD(ker_1, vec, acc_0); - } - - acc_64_0 += acc_0; - - for (int j = col_loop_cnt * 4; j < rhs_cols; j++) - { - const int32_t lhs_temp = (*lhs_vec); - lhs_vec++; - acc_64_0 += lhs_temp * (*rhs_0); - rhs_0++; - } - - if (bias) - { - acc_64_0 += *bias++; - } - q31_t tmp; - tmp = arm_nn_requantize_s64(acc_64_0, dst_multiplier, dst_shift); - tmp = MAX(tmp, activation_min); - tmp = MIN(tmp, activation_max); - *dst++ = (q15_t)tmp; - } - -#else - for (int i_row_loop_cnt = 0; i_row_loop_cnt < rhs_rows; i_row_loop_cnt++) - { - const q15_t *lhs_ptr = lhs; - const q7_t *rhs_ptr_0 = &rhs[0]; - - q63_t result = 0; - - if (bias) - { - result = *bias++; - } - for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) - { - const q63_t rhs_value0 = (int8_t)*rhs_ptr_0; - const q63_t lhs_value = *lhs_ptr; - - result += lhs_value * rhs_value0; - - ++rhs_ptr_0; - ++lhs_ptr; - } - - // Quantize down - result = arm_nn_requantize_s64(result, dst_multiplier, dst_shift); - - // Clamp the result - result = ((result) > (activation_min) ? (result) : (activation_min)); - result = ((result) < (activation_max) ? (result) : (activation_max)); - - *dst++ = (q15_t)result; - rhs += rhs_cols; - } -#endif - - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNBasicMath group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c deleted file mode 100644 index c7dfd14..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_s8.c +++ /dev/null @@ -1,402 +0,0 @@ -/* - * Copyright (C) 2020-2022 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_vec_mat_mult_t_s8 - * Description: s8 vector by matrix (transposed) multiplication - * - * $Date: 28 April 2022 - * $Revision: V.3.0.1 - * - * Target Processor: Cortex-M - * - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -/* - * s8 vector(lhs) by matrix (transposed) multiplication - * - * Refer header file for details. - * - */ -arm_status arm_nn_vec_mat_mult_t_s8(const q7_t *lhs, - const q7_t *rhs, - const q31_t *bias, - q7_t *dst, - const int32_t lhs_offset, - const int32_t rhs_offset, - const int32_t dst_offset, - const int32_t dst_multiplier, - const int32_t dst_shift, - const int32_t rhs_cols, - const int32_t rhs_rows, - const int32_t activation_min, - const int32_t activation_max, - const int32_t address_offset) -{ - (void)rhs_offset; -#if defined(ARM_MATH_MVEI) - const int32_t row_loop_cnt = rhs_rows / 3; - const uint32x4_t address_offset_array = {0, address_offset, address_offset * 2, address_offset * 3}; - - for (int i_row_loop_cnt = 0; i_row_loop_cnt < row_loop_cnt; i_row_loop_cnt++) - { - int32_t acc_0 = 0; - int32_t acc_1 = 0; - int32_t acc_2 = 0; - - const int32_t col_loop_cnt = (rhs_cols + 15) / 16; - - const int8_t *lhs_vec = lhs; - const int8_t *rhs_0 = rhs; - const int8_t *rhs_1 = rhs + rhs_cols; - const int8_t *rhs_2 = rhs + 2 * rhs_cols; - - int32_t rhs_sum_0 = 0; - int32_t rhs_sum_1 = 0; - int32_t rhs_sum_2 = 0; - - uint32_t col_cnt = (uint32_t)rhs_cols; - - for (int i = 0; i < col_loop_cnt; i++) - { - mve_pred16_t p = vctp8q(col_cnt); - col_cnt -= 16; - - const int8x16_t input = vldrbq_z_s8(lhs_vec, p); - - const int8x16_t ker_0 = vldrbq_z_s8(rhs_0, p); - rhs_sum_0 = vaddvaq_p_s8(rhs_sum_0, ker_0, p); - acc_0 = vmladavaq_p_s8(acc_0, ker_0, input, p); - - const int8x16_t ker_1 = vldrbq_z_s8(rhs_1, p); - rhs_sum_1 = vaddvaq_p_s8(rhs_sum_1, ker_1, p); - acc_1 = vmladavaq_p_s8(acc_1, ker_1, input, p); - - const int8x16_t ker_2 = vldrbq_z_s8(rhs_2, p); - rhs_sum_2 = vaddvaq_p_s8(rhs_sum_2, ker_2, p); - acc_2 = vmladavaq_p_s8(acc_2, ker_2, input, p); - - lhs_vec += 16; - rhs_0 += 16; - rhs_1 += 16; - rhs_2 += 16; - } - rhs += 3 * rhs_cols; - - int32x4_t acc = {acc_0, acc_1, acc_2, 0}; - mve_pred16_t p = vctp32q(3); - if (bias) - { - int32x4_t b = vldrwq_z_s32(bias, p); - acc = vaddq_m_s32(vuninitializedq_s32(), acc, b, p); - bias += 3; - } - const int32x4_t rhs_sum = {rhs_sum_0, rhs_sum_1, rhs_sum_2, 0}; - acc += vdupq_n_s32(lhs_offset) * rhs_sum; - - acc = arm_requantize_mve(acc, dst_multiplier, dst_shift); - acc = vaddq_s32(acc, vdupq_n_s32(dst_offset)); - acc = vmaxq_s32(acc, vdupq_n_s32(activation_min)); - acc = vminq_s32(acc, vdupq_n_s32(activation_max)); - - if (address_offset > 1L) - { - vstrbq_scatter_offset_s32(dst, address_offset_array, acc); - } - else - { - vstrbq_p_s32(dst, acc, p); - } - dst += 3 * address_offset; - } - - const int loop_cnt = rhs_rows % 3; - for (int i_row_loop_cnt = 0; i_row_loop_cnt < loop_cnt; i_row_loop_cnt++) - { - int32_t acc_0 = 0; - const int32_t col_loop_cnt = (rhs_cols + 15) / 16; - const int8_t *lhs_vec = lhs; - const int8_t *rhs_0 = rhs; - int32_t rhs_sum_0 = 0; - uint32_t col_cnt = (uint32_t)rhs_cols; - - for (int i = 0; i < col_loop_cnt; i++) - { - mve_pred16_t p = vctp8q(col_cnt); - col_cnt -= 16; - const int8x16_t input = vldrbq_z_s8(lhs_vec, p); - - const int8x16_t ker_0 = vldrbq_z_s8(rhs_0, p); - rhs_sum_0 = vaddvaq_p_s8(rhs_sum_0, ker_0, p); - acc_0 = vmladavaq_p_s8(acc_0, ker_0, input, p); - - lhs_vec += 16; - rhs_0 += 16; - } - rhs += rhs_cols; - - if (bias) - { - acc_0 += *bias; - bias++; - } - const int32_t offsets = rhs_sum_0 * lhs_offset; - acc_0 += offsets; - acc_0 = arm_nn_requantize(acc_0, dst_multiplier, dst_shift); - acc_0 += dst_offset; - - // Clamp the result - acc_0 = MAX(acc_0, activation_min); - *dst = MIN(acc_0, activation_max); - dst += address_offset; - } - -#elif defined(ARM_MATH_DSP) - const int32_t row_loop_cnt = rhs_rows / 2; - const int16_t lhs_offset_s16 = (int16_t)lhs_offset; - const uint32_t lhs_offset_s16x2 = __PKHBT(lhs_offset_s16, lhs_offset_s16, 16); - - for (int32_t i = 0; i < row_loop_cnt; i++) - { - int32_t acc_0 = 0; - int32_t acc_1 = 0; - if (bias) - { - acc_0 = *bias++; - acc_1 = *bias++; - } - - const int32_t col_loop_cnt = rhs_cols / 4; - - const int8_t *lhs_vec = lhs; - const int8_t *rhs_0 = rhs; - const int8_t *rhs_1 = rhs + rhs_cols; - rhs += 2 * rhs_cols; - - for (int j = col_loop_cnt; j != 0; j--) - { - int32_t vec_0 = arm_nn_read_q7x4_ia(&lhs_vec); - int32_t vec_1 = __SXTAB16_RORn(lhs_offset_s16x2, (uint32_t)vec_0, 8); - - vec_0 = __SXTAB16(lhs_offset_s16x2, vec_0); - - int32_t ker_0 = arm_nn_read_q7x4_ia(&rhs_0); - int32_t ker_1 = __SXTB16_RORn((uint32_t)ker_0, 8); - ker_0 = __SXTB16(ker_0); - - acc_0 = __SMLAD(ker_1, vec_1, acc_0); - acc_0 = __SMLAD(ker_0, vec_0, acc_0); - - ker_0 = arm_nn_read_q7x4_ia(&rhs_1); - ker_1 = __SXTB16_RORn((uint32_t)ker_0, 8); - ker_0 = __SXTB16(ker_0); - - acc_1 = __SMLAD(ker_1, vec_1, acc_1); - acc_1 = __SMLAD(ker_0, vec_0, acc_1); - } - - for (int k = col_loop_cnt * 4; k < rhs_cols; k++) - { - const int32_t lhs_temp = (*lhs_vec + lhs_offset); - lhs_vec++; - acc_0 += lhs_temp * (*rhs_0); - rhs_0++; - acc_1 += lhs_temp * (*rhs_1); - rhs_1++; - } - - acc_0 = arm_nn_requantize(acc_0, dst_multiplier, dst_shift); - acc_1 = arm_nn_requantize(acc_1, dst_multiplier, dst_shift); - - // Add offset - acc_0 += dst_offset; - acc_1 += dst_offset; - // Clamp the result - acc_0 = MAX(acc_0, activation_min); - acc_0 = MIN(acc_0, activation_max); - acc_1 = MAX(acc_1, activation_min); - acc_1 = MIN(acc_1, activation_max); - *dst = (int8_t)acc_0; - *(dst + address_offset) = (int8_t)acc_1; - dst += 2 * address_offset; - } - - if (rhs_rows & 0x1) - { - int32_t acc_0 = 0; - if (bias) - { - acc_0 = *bias++; - } - const int32_t col_loop_cnt = rhs_cols / 4; - - const int8_t *lhs_vec = lhs; - const int8_t *rhs_0 = rhs; - - for (int i = col_loop_cnt; i != 0; i--) - { - int32_t vec_0 = arm_nn_read_q7x4_ia(&lhs_vec); - int32_t vec_1 = __SXTAB16_RORn(lhs_offset_s16x2, (uint32_t)vec_0, 8); - vec_0 = __SXTAB16(lhs_offset_s16x2, vec_0); - - int32_t ker_0 = arm_nn_read_q7x4_ia(&rhs_0); - int32_t ker_1 = __SXTB16_RORn((uint32_t)ker_0, 8); - ker_0 = __SXTB16(ker_0); - - acc_0 = __SMLAD(ker_1, vec_1, acc_0); - acc_0 = __SMLAD(ker_0, vec_0, acc_0); - } - - for (int j = col_loop_cnt * 4; j < rhs_cols; j++) - { - const int32_t lhs_temp = (*lhs_vec + lhs_offset); - lhs_vec++; - acc_0 += lhs_temp * (*rhs_0); - rhs_0++; - } - - acc_0 = arm_nn_requantize(acc_0, dst_multiplier, dst_shift); - - // Add offset - acc_0 += dst_offset; - // Clamp the result - acc_0 = MAX(acc_0, activation_min); - acc_0 = MIN(acc_0, activation_max); - *dst = (int8_t)acc_0; - dst += address_offset; - } - -#else - - const int32_t row_loop_cnt = rhs_rows / 3; - - for (int i_row_loop_cnt = 0; i_row_loop_cnt < row_loop_cnt; i_row_loop_cnt++) - { - const q7_t *lhs_ptr = lhs; - const q7_t *rhs_ptr_0 = &rhs[0]; - const q7_t *rhs_ptr_1 = &rhs[rhs_cols]; - const q7_t *rhs_ptr_2 = &rhs[rhs_cols * 2]; - - q31_t res00 = 0; - q31_t res01 = 0; - q31_t res02 = 0; - if (bias) - { - res00 = *bias++; - res01 = *bias++; - res02 = *bias++; - } - for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) - { - const q31_t rhs_value0 = (int8_t)*rhs_ptr_0; - const q31_t rhs_value1 = (int8_t)*rhs_ptr_1; - const q31_t rhs_value2 = (int8_t)*rhs_ptr_2; - const q31_t lhs_value = (int8_t)*lhs_ptr + lhs_offset; - - res00 += lhs_value * rhs_value0; - res01 += lhs_value * rhs_value1; - res02 += lhs_value * rhs_value2; - - ++rhs_ptr_0; - ++rhs_ptr_1; - ++rhs_ptr_2; - ++lhs_ptr; - } - // Quantize down - res00 = arm_nn_requantize(res00, dst_multiplier, dst_shift); - res01 = arm_nn_requantize(res01, dst_multiplier, dst_shift); - res02 = arm_nn_requantize(res02, dst_multiplier, dst_shift); - - // Add offset - res00 += dst_offset; - res01 += dst_offset; - res02 += dst_offset; - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - res01 = MAX(res01, activation_min); - res01 = MIN(res01, activation_max); - res02 = MAX(res02, activation_min); - res02 = MIN(res02, activation_max); - - *dst = (q7_t)res00; - *(dst + address_offset) = (q7_t)res01; - *(dst + 2 * address_offset) = (q7_t)res02; - dst += 3 * address_offset; - - rhs += 3 * rhs_cols; - } - - const int loop_cnt = rhs_rows % 3; - - for (int i_loop_cnt = 0; i_loop_cnt < loop_cnt; i_loop_cnt++) - { - const q7_t *lhs_ptr = &lhs[0]; - const q7_t *rhs_ptr = &rhs[0]; - - q31_t res00 = 0; - if (bias) - { - res00 = *bias++; - } - - for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) - { - q31_t rhs_value0 = (int8_t)rhs_ptr[0]; - q31_t lhs_value = (int8_t)lhs_ptr[0] + lhs_offset; - - res00 += lhs_value * rhs_value0; - - ++rhs_ptr; - ++lhs_ptr; - } - - // Quantize down - res00 = arm_nn_requantize(res00, dst_multiplier, dst_shift); - - // Add offset - res00 += dst_offset; - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - - *dst = (int8_t)res00; - dst += address_offset; - rhs += rhs_cols; - } -#endif - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNBasicMath group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_svdf_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_svdf_s8.c deleted file mode 100644 index 5b821c3..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nn_vec_mat_mult_t_svdf_s8.c +++ /dev/null @@ -1,341 +0,0 @@ -/* - * Copyright (C) 2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_vec_mat_mult_t_svdf_s8 - * Description: s8 vector by matrix (transposed) multiplication with - * s16 output. Targetted at SVDF operator. - * - * $Date: 15. April 2021 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M - * - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup NNBasicMath - * @{ - */ - -/* - * s8 vector(lhs) by matrix (transposed) multiplication - * - * Refer header file for details. - * - */ -arm_status arm_nn_vec_mat_mult_t_svdf_s8(const q7_t *lhs, - const q7_t *rhs, - q15_t *dst, - const int32_t lhs_offset, - const int32_t rhs_offset, - const int32_t dst_offset, - const int32_t dst_multiplier, - const int32_t dst_shift, - const int32_t rhs_cols, - const int32_t rhs_rows, - const int32_t activation_min, - const int32_t activation_max) -{ - (void)rhs_offset; - if (rhs_cols < 0 || (NN_Q31_MAX - rhs_cols) < 16 || dst_offset < 0) - { - return ARM_MATH_ARGUMENT_ERROR; - } - - (void)rhs_offset; -#if defined(ARM_MATH_MVEI) - int32_t row_loop_cnt = rhs_rows / 3; - - for (int i_row_loop_cnt = 0; i_row_loop_cnt < row_loop_cnt; i_row_loop_cnt++) - { - int32_t acc_0 = 0; - int32_t acc_1 = 0; - int32_t acc_2 = 0; - - const int32_t col_loop_cnt = (rhs_cols + 15) / 16; - - const int8_t *lhs_vec = lhs; - const int8_t *rhs_0 = rhs; - const int8_t *rhs_1 = rhs + rhs_cols; - const int8_t *rhs_2 = rhs + 2 * rhs_cols; - - int32_t rhs_sum_0 = 0; - int32_t rhs_sum_1 = 0; - int32_t rhs_sum_2 = 0; - - uint32_t col_cnt = (uint32_t)rhs_cols; - - for (int i = 0; i < col_loop_cnt; i++) - { - mve_pred16_t p = vctp8q(col_cnt); - col_cnt -= 16; - - const int8x16_t input = vldrbq_z_s8(lhs_vec, p); - - const int8x16_t ker_0 = vldrbq_z_s8(rhs_0, p); - rhs_sum_0 = vaddvaq_p_s8(rhs_sum_0, ker_0, p); - acc_0 = vmladavaq_p_s8(acc_0, ker_0, input, p); - - const int8x16_t ker_1 = vldrbq_z_s8(rhs_1, p); - rhs_sum_1 = vaddvaq_p_s8(rhs_sum_1, ker_1, p); - acc_1 = vmladavaq_p_s8(acc_1, ker_1, input, p); - - const int8x16_t ker_2 = vldrbq_z_s8(rhs_2, p); - rhs_sum_2 = vaddvaq_p_s8(rhs_sum_2, ker_2, p); - acc_2 = vmladavaq_p_s8(acc_2, ker_2, input, p); - - lhs_vec += 16; - rhs_0 += 16; - rhs_1 += 16; - rhs_2 += 16; - } - rhs += 3 * rhs_cols; - - int32x4_t acc = {acc_0, acc_1, acc_2, 0}; - const int32x4_t rhs_sum = {rhs_sum_0, rhs_sum_1, rhs_sum_2, 0}; - acc += vdupq_n_s32(lhs_offset) * rhs_sum; - - acc = arm_requantize_mve(acc, dst_multiplier, dst_shift); - acc = vmaxq_s32(acc, vdupq_n_s32(activation_min)); - acc = vminq_s32(acc, vdupq_n_s32(activation_max)); - *(dst) = (int16_t)acc[0]; - *(dst + dst_offset) = (int16_t)acc[1]; - *(dst + 2 * dst_offset) = (int16_t)acc[2]; - dst += 3 * dst_offset; - } - - const int loop_cnt = rhs_rows % 3; - for (int i_row_loop_cnt = 0; i_row_loop_cnt < loop_cnt; i_row_loop_cnt++) - { - int32_t acc_0 = 0; - const int32_t col_loop_cnt = (rhs_cols + 15) / 16; - const int8_t *lhs_vec = lhs; - const int8_t *rhs_0 = rhs; - int32_t rhs_sum_0 = 0; - uint32_t col_cnt = (uint32_t)rhs_cols; - - for (int i = 0; i < col_loop_cnt; i++) - { - mve_pred16_t p = vctp8q(col_cnt); - col_cnt -= 16; - const int8x16_t input = vldrbq_z_s8(lhs_vec, p); - - const int8x16_t ker_0 = vldrbq_z_s8(rhs_0, p); - rhs_sum_0 = vaddvaq_p_s8(rhs_sum_0, ker_0, p); - acc_0 = vmladavaq_p_s8(acc_0, ker_0, input, p); - - lhs_vec += 16; - rhs_0 += 16; - } - rhs += rhs_cols; - - const int32_t offsets = rhs_sum_0 * lhs_offset; - acc_0 = __QADD(acc_0, offsets); - acc_0 = arm_nn_requantize(acc_0, dst_multiplier, dst_shift); - - // Clamp the result - acc_0 = MAX(acc_0, activation_min); - *dst = (q15_t)MIN(acc_0, activation_max); - dst += dst_offset; - } - -#elif defined(ARM_MATH_DSP) - int32_t row_loop_cnt = rhs_rows / 2; - - const int16_t lhs_offset_s16 = lhs_offset; - const int16_t rhs_offset_s16 = rhs_offset; - - const uint32_t lhs_offset_s16x2 = __PKHBT(lhs_offset_s16, lhs_offset_s16, 16); - const uint32_t rhs_offset_s16x2 = __PKHBT(rhs_offset_s16, rhs_offset_s16, 16); - for (int32_t i = 0; i < row_loop_cnt; i++) - { - int32_t acc_0 = 0; - int32_t acc_1 = 0; - - const int32_t col_loop_cnt = rhs_cols / 4; - const int8_t *lhs_vec = lhs; - const int8_t *rhs_0 = rhs; - const int8_t *rhs_1 = rhs + rhs_cols; - rhs += 2 * rhs_cols; - for (int j = col_loop_cnt; j != 0; j--) - { - int32_t vec_0 = arm_nn_read_q7x4_ia(&lhs_vec); - int32_t vec_1 = __SXTAB16_RORn(lhs_offset_s16x2, (uint32_t)vec_0, 8); - vec_0 = __SXTAB16(lhs_offset_s16x2, vec_0); - int32_t ker_0 = arm_nn_read_q7x4_ia(&rhs_0); - int32_t ker_1 = __SXTAB16_RORn(rhs_offset_s16x2, (uint32_t)ker_0, 8); - ker_0 = __SXTAB16(rhs_offset_s16x2, ker_0); - acc_0 = __SMLAD(ker_1, vec_1, acc_0); - acc_0 = __SMLAD(ker_0, vec_0, acc_0); - ker_0 = arm_nn_read_q7x4_ia(&rhs_1); - ker_1 = __SXTAB16_RORn(rhs_offset_s16x2, (uint32_t)ker_0, 8); - ker_0 = __SXTAB16(rhs_offset_s16x2, ker_0); - acc_1 = __SMLAD(ker_1, vec_1, acc_1); - acc_1 = __SMLAD(ker_0, vec_0, acc_1); - } - for (int k = col_loop_cnt * 4; k < rhs_cols; k++) - { - const int32_t lhs_temp = (*lhs_vec + lhs_offset); - lhs_vec++; - acc_0 += lhs_temp * (*rhs_0 + rhs_offset); - rhs_0++; - acc_1 += lhs_temp * (*rhs_1 + rhs_offset); - rhs_1++; - } - acc_0 = arm_nn_requantize(acc_0, dst_multiplier, dst_shift); - acc_1 = arm_nn_requantize(acc_1, dst_multiplier, dst_shift); - - // Clamp the result - acc_0 = MAX(acc_0, activation_min); - acc_0 = MIN(acc_0, activation_max); - acc_1 = MAX(acc_1, activation_min); - acc_1 = MIN(acc_1, activation_max); - *dst = (q15_t)acc_0; - *(dst + dst_offset) = (q15_t)acc_1; - dst += 2 * dst_offset; - } - if (rhs_rows & 0x1) - { - int32_t acc_0 = 0; - const int32_t col_loop_cnt = rhs_cols / 4; - const int8_t *lhs_vec = lhs; - const int8_t *rhs_0 = rhs; - for (int i = col_loop_cnt; i != 0; i--) - { - int32_t vec_0 = arm_nn_read_q7x4_ia(&lhs_vec); - int32_t vec_1 = __SXTAB16(lhs_offset_s16x2, __ROR((uint32_t)vec_0, 8)); - vec_0 = __SXTAB16(lhs_offset_s16x2, vec_0); - int32_t ker_0 = arm_nn_read_q7x4_ia(&rhs_0); - int32_t ker_1 = __SXTAB16(rhs_offset_s16x2, __ROR((uint32_t)ker_0, 8)); - ker_0 = __SXTAB16(rhs_offset_s16x2, ker_0); - acc_0 = __SMLAD(ker_1, vec_1, acc_0); - acc_0 = __SMLAD(ker_0, vec_0, acc_0); - } - for (int j = col_loop_cnt * 4; j < rhs_cols; j++) - { - const int32_t lhs_temp = (*lhs_vec + lhs_offset); - lhs_vec++; - acc_0 += lhs_temp * (*rhs_0 + rhs_offset); - rhs_0++; - } - acc_0 = arm_nn_requantize(acc_0, dst_multiplier, dst_shift); - - // Clamp the result - acc_0 = MAX(acc_0, activation_min); - acc_0 = MIN(acc_0, activation_max); - *dst = (q15_t)acc_0; - dst += dst_offset; - } - -#else - - int32_t row_loop_cnt = rhs_rows / 3; - - for (int i_row_loop_cnt = 0; i_row_loop_cnt < row_loop_cnt; i_row_loop_cnt++) - { - const q7_t *lhs_ptr = lhs; - const q7_t *rhs_ptr_0 = &rhs[0]; - const q7_t *rhs_ptr_1 = &rhs[rhs_cols]; - const q7_t *rhs_ptr_2 = &rhs[rhs_cols * 2]; - - q31_t res00 = 0; - q31_t res01 = 0; - q31_t res02 = 0; - for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) - { - const q31_t rhs_value0 = (int8_t)*rhs_ptr_0; - const q31_t rhs_value1 = (int8_t)*rhs_ptr_1; - const q31_t rhs_value2 = (int8_t)*rhs_ptr_2; - const q31_t lhs_value = (int8_t)*lhs_ptr + lhs_offset; - - res00 += lhs_value * rhs_value0; - res01 += lhs_value * rhs_value1; - res02 += lhs_value * rhs_value2; - - ++rhs_ptr_0; - ++rhs_ptr_1; - ++rhs_ptr_2; - ++lhs_ptr; - } - // Quantize down - res00 = arm_nn_requantize(res00, dst_multiplier, dst_shift); - res01 = arm_nn_requantize(res01, dst_multiplier, dst_shift); - res02 = arm_nn_requantize(res02, dst_multiplier, dst_shift); - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - res01 = MAX(res01, activation_min); - res01 = MIN(res01, activation_max); - res02 = MAX(res02, activation_min); - res02 = MIN(res02, activation_max); - - *dst = (q15_t)res00; - *(dst + dst_offset) = (q15_t)res01; - *(dst + 2 * dst_offset) = (q15_t)res02; - dst += 3 * dst_offset; - rhs += 3 * rhs_cols; - } - - const int loop_cnt = rhs_rows % 3; - - for (int i_loop_cnt = 0; i_loop_cnt < loop_cnt; i_loop_cnt++) - { - const q7_t *lhs_ptr = &lhs[0]; - const q7_t *rhs_ptr = &rhs[0]; - - q31_t res00 = 0; - - for (int32_t rhs_cols_idx = 0; rhs_cols_idx < rhs_cols; ++rhs_cols_idx) - { - q31_t rhs_value0 = (int8_t)rhs_ptr[0] + rhs_offset; - q31_t lhs_value = (int8_t)lhs_ptr[0] + lhs_offset; - - res00 += lhs_value * rhs_value0; - - ++rhs_ptr; - ++lhs_ptr; - } - - // Quantize down - res00 = arm_nn_requantize(res00, dst_multiplier, dst_shift); - - // Clamp the result - res00 = MAX(res00, activation_min); - res00 = MIN(res00, activation_max); - - *dst = (q15_t)res00; - dst += dst_offset; - rhs += rhs_cols; - } -#endif - - return ARM_MATH_SUCCESS; -} - -/** - * @} end of NNBasicMath group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nntables.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nntables.c deleted file mode 100644 index 5a8cea2..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_nntables.c +++ /dev/null @@ -1,203 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nntables.c - * Description: Converts the elements of the Q7 vector to Q15 vector without left-shift - * - * $Date: 17. January 2018 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/** - * @brief tables for various activation functions - * - * This file include the declaration of common tables. - * Most of them are used for activation functions - * - * Assumption: - * Unified table: input is 3.x format, i.e, range of [-8, 8) - * sigmoid(8) = 0.9996646498695336 - * tanh(8) = 0.9999997749296758 - * The accuracy here should be good enough - * - * 2-stage HL table: - * - * The entire input range is divided into two parts: - * - * Low range table: 0x000x xxxx or 0x111x xxxx - * table entry will be the binary number excluding the first - * two digits, i.e., 0x0x xxxx or 0x1x xxxx - * - * - * - * High range table 0x0010 0000 -- 0x0111 1111 - * 0x1000 0000 -- 0x1101 1111 - * - * For positive numbers, table entry will be - * 0x0010 0000 -- 0x0111 1111 minus 0x0010 0000 - * i.e., 0x0000 0000 - 0x0101 11111 - * - * same thing for the negative numbers, table entry will be - * 0x1000 0000 -- 0x1101 1111 minux 0x0010 0000 - * i.e., 0x0110 0000 - 0x1011 1111 - */ - -const q7_t sigmoidTable_q7[256] = { - 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x53, 0x55, 0x57, 0x59, 0x5a, 0x5c, 0x5e, 0x5f, 0x61, - 0x62, 0x63, 0x65, 0x66, 0x67, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x72, 0x73, 0x74, 0x74, - 0x75, 0x76, 0x76, 0x77, 0x77, 0x78, 0x78, 0x79, 0x79, 0x7a, 0x7a, 0x7a, 0x7b, 0x7b, 0x7b, 0x7c, 0x7c, 0x7c, 0x7c, - 0x7c, 0x7d, 0x7d, 0x7d, 0x7d, 0x7d, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7e, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x01, - 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x02, 0x02, 0x02, 0x02, - 0x02, 0x02, 0x02, 0x02, 0x03, 0x03, 0x03, 0x03, 0x03, 0x04, 0x04, 0x04, 0x04, 0x04, 0x05, 0x05, 0x05, 0x06, 0x06, - 0x06, 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0c, 0x0c, 0x0d, 0x0e, 0x0e, 0x0f, 0x10, 0x11, 0x12, - 0x13, 0x14, 0x15, 0x16, 0x17, 0x19, 0x1a, 0x1b, 0x1d, 0x1e, 0x1f, 0x21, 0x22, 0x24, 0x26, 0x27, 0x29, 0x2b, 0x2d, - 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, -}; - -const q15_t sigmoidTable_q15[256] = { - 0x4000, 0x4200, 0x43ff, 0x45fc, 0x47f5, 0x49eb, 0x4bdc, 0x4dc8, 0x4fad, 0x518a, 0x5360, 0x552c, 0x56ef, 0x58a8, - 0x5a57, 0x5bfb, 0x5d93, 0x5f20, 0x60a1, 0x6216, 0x637f, 0x64db, 0x662b, 0x676f, 0x68a6, 0x69d2, 0x6af1, 0x6c05, - 0x6d0d, 0x6e09, 0x6efb, 0x6fe2, 0x70be, 0x7190, 0x7258, 0x7316, 0x73cc, 0x7478, 0x751b, 0x75b7, 0x764a, 0x76d6, - 0x775b, 0x77d8, 0x784f, 0x78c0, 0x792a, 0x798f, 0x79ee, 0x7a48, 0x7a9d, 0x7aed, 0x7b39, 0x7b80, 0x7bc4, 0x7c03, - 0x7c3f, 0x7c78, 0x7cad, 0x7ce0, 0x7d0f, 0x7d3c, 0x7d66, 0x7d8d, 0x7db3, 0x7dd6, 0x7df7, 0x7e16, 0x7e33, 0x7e4f, - 0x7e69, 0x7e81, 0x7e98, 0x7eae, 0x7ec2, 0x7ed5, 0x7ee7, 0x7ef8, 0x7f08, 0x7f17, 0x7f25, 0x7f32, 0x7f3e, 0x7f4a, - 0x7f55, 0x7f5f, 0x7f69, 0x7f72, 0x7f7b, 0x7f83, 0x7f8a, 0x7f91, 0x7f98, 0x7f9e, 0x7fa4, 0x7faa, 0x7faf, 0x7fb4, - 0x7fb8, 0x7fbd, 0x7fc1, 0x7fc5, 0x7fc8, 0x7fcc, 0x7fcf, 0x7fd2, 0x7fd5, 0x7fd7, 0x7fda, 0x7fdc, 0x7fde, 0x7fe0, - 0x7fe2, 0x7fe4, 0x7fe6, 0x7fe7, 0x7fe9, 0x7fea, 0x7feb, 0x7fed, 0x7fee, 0x7fef, 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3, - 0x7ff4, 0x7ff4, 0x000b, 0x000c, 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, 0x0012, 0x0013, 0x0015, 0x0016, - 0x0017, 0x0019, 0x001a, 0x001c, 0x001e, 0x0020, 0x0022, 0x0024, 0x0026, 0x0029, 0x002b, 0x002e, 0x0031, 0x0034, - 0x0038, 0x003b, 0x003f, 0x0043, 0x0048, 0x004c, 0x0051, 0x0056, 0x005c, 0x0062, 0x0068, 0x006f, 0x0076, 0x007d, - 0x0085, 0x008e, 0x0097, 0x00a1, 0x00ab, 0x00b6, 0x00c2, 0x00ce, 0x00db, 0x00e9, 0x00f8, 0x0108, 0x0119, 0x012b, - 0x013e, 0x0152, 0x0168, 0x017f, 0x0197, 0x01b1, 0x01cd, 0x01ea, 0x0209, 0x022a, 0x024d, 0x0273, 0x029a, 0x02c4, - 0x02f1, 0x0320, 0x0353, 0x0388, 0x03c1, 0x03fd, 0x043c, 0x0480, 0x04c7, 0x0513, 0x0563, 0x05b8, 0x0612, 0x0671, - 0x06d6, 0x0740, 0x07b1, 0x0828, 0x08a5, 0x092a, 0x09b6, 0x0a49, 0x0ae5, 0x0b88, 0x0c34, 0x0cea, 0x0da8, 0x0e70, - 0x0f42, 0x101e, 0x1105, 0x11f7, 0x12f3, 0x13fb, 0x150f, 0x162e, 0x175a, 0x1891, 0x19d5, 0x1b25, 0x1c81, 0x1dea, - 0x1f5f, 0x20e0, 0x226d, 0x2405, 0x25a9, 0x2758, 0x2911, 0x2ad4, 0x2ca0, 0x2e76, 0x3053, 0x3238, 0x3424, 0x3615, - 0x380b, 0x3a04, 0x3c01, 0x3e00, -}; - -const q15_t sigmoidLTable_q15[128] = { - 0x4000, 0x4100, 0x4200, 0x42ff, 0x43ff, 0x44fd, 0x45fc, 0x46f9, 0x47f5, 0x48f1, 0x49eb, 0x4ae5, 0x4bdc, - 0x4cd3, 0x4dc8, 0x4ebb, 0x4fad, 0x509c, 0x518a, 0x5276, 0x5360, 0x5447, 0x552c, 0x560f, 0x56ef, 0x57cd, - 0x58a8, 0x5981, 0x5a57, 0x5b2a, 0x5bfb, 0x5cc9, 0x5d93, 0x5e5b, 0x5f20, 0x5fe2, 0x60a1, 0x615d, 0x6216, - 0x62cc, 0x637f, 0x642e, 0x64db, 0x6584, 0x662b, 0x66ce, 0x676f, 0x680c, 0x68a6, 0x693d, 0x69d2, 0x6a63, - 0x6af1, 0x6b7c, 0x6c05, 0x6c8a, 0x6d0d, 0x6d8d, 0x6e09, 0x6e84, 0x6efb, 0x6f70, 0x6fe2, 0x7051, 0x0f42, - 0x0faf, 0x101e, 0x1090, 0x1105, 0x117c, 0x11f7, 0x1273, 0x12f3, 0x1376, 0x13fb, 0x1484, 0x150f, 0x159d, - 0x162e, 0x16c3, 0x175a, 0x17f4, 0x1891, 0x1932, 0x19d5, 0x1a7c, 0x1b25, 0x1bd2, 0x1c81, 0x1d34, 0x1dea, - 0x1ea3, 0x1f5f, 0x201e, 0x20e0, 0x21a5, 0x226d, 0x2337, 0x2405, 0x24d6, 0x25a9, 0x267f, 0x2758, 0x2833, - 0x2911, 0x29f1, 0x2ad4, 0x2bb9, 0x2ca0, 0x2d8a, 0x2e76, 0x2f64, 0x3053, 0x3145, 0x3238, 0x332d, 0x3424, - 0x351b, 0x3615, 0x370f, 0x380b, 0x3907, 0x3a04, 0x3b03, 0x3c01, 0x3d01, 0x3e00, 0x3f00, -}; - -const q15_t sigmoidHTable_q15[192] = { - 0x70be, 0x7190, 0x7258, 0x7316, 0x73cc, 0x7478, 0x751b, 0x75b7, 0x764a, 0x76d6, 0x775b, 0x77d8, 0x784f, 0x78c0, - 0x792a, 0x798f, 0x79ee, 0x7a48, 0x7a9d, 0x7aed, 0x7b39, 0x7b80, 0x7bc4, 0x7c03, 0x7c3f, 0x7c78, 0x7cad, 0x7ce0, - 0x7d0f, 0x7d3c, 0x7d66, 0x7d8d, 0x7db3, 0x7dd6, 0x7df7, 0x7e16, 0x7e33, 0x7e4f, 0x7e69, 0x7e81, 0x7e98, 0x7eae, - 0x7ec2, 0x7ed5, 0x7ee7, 0x7ef8, 0x7f08, 0x7f17, 0x7f25, 0x7f32, 0x7f3e, 0x7f4a, 0x7f55, 0x7f5f, 0x7f69, 0x7f72, - 0x7f7b, 0x7f83, 0x7f8a, 0x7f91, 0x7f98, 0x7f9e, 0x7fa4, 0x7faa, 0x7faf, 0x7fb4, 0x7fb8, 0x7fbd, 0x7fc1, 0x7fc5, - 0x7fc8, 0x7fcc, 0x7fcf, 0x7fd2, 0x7fd5, 0x7fd7, 0x7fda, 0x7fdc, 0x7fde, 0x7fe0, 0x7fe2, 0x7fe4, 0x7fe6, 0x7fe7, - 0x7fe9, 0x7fea, 0x7feb, 0x7fed, 0x7fee, 0x7fef, 0x7ff0, 0x7ff1, 0x7ff2, 0x7ff3, 0x7ff4, 0x7ff4, 0x000b, 0x000c, - 0x000c, 0x000d, 0x000e, 0x000f, 0x0010, 0x0011, 0x0012, 0x0013, 0x0015, 0x0016, 0x0017, 0x0019, 0x001a, 0x001c, - 0x001e, 0x0020, 0x0022, 0x0024, 0x0026, 0x0029, 0x002b, 0x002e, 0x0031, 0x0034, 0x0038, 0x003b, 0x003f, 0x0043, - 0x0048, 0x004c, 0x0051, 0x0056, 0x005c, 0x0062, 0x0068, 0x006f, 0x0076, 0x007d, 0x0085, 0x008e, 0x0097, 0x00a1, - 0x00ab, 0x00b6, 0x00c2, 0x00ce, 0x00db, 0x00e9, 0x00f8, 0x0108, 0x0119, 0x012b, 0x013e, 0x0152, 0x0168, 0x017f, - 0x0197, 0x01b1, 0x01cd, 0x01ea, 0x0209, 0x022a, 0x024d, 0x0273, 0x029a, 0x02c4, 0x02f1, 0x0320, 0x0353, 0x0388, - 0x03c1, 0x03fd, 0x043c, 0x0480, 0x04c7, 0x0513, 0x0563, 0x05b8, 0x0612, 0x0671, 0x06d6, 0x0740, 0x07b1, 0x0828, - 0x08a5, 0x092a, 0x09b6, 0x0a49, 0x0ae5, 0x0b88, 0x0c34, 0x0cea, 0x0da8, 0x0e70, -}; - -const q7_t tanhTable_q7[256] = { - 0x00, 0x08, 0x10, 0x18, 0x1f, 0x27, 0x2e, 0x35, 0x3b, 0x41, 0x47, 0x4c, 0x51, 0x56, 0x5a, 0x5e, 0x61, 0x65, 0x68, - 0x6a, 0x6d, 0x6f, 0x71, 0x72, 0x74, 0x75, 0x76, 0x78, 0x78, 0x79, 0x7a, 0x7b, 0x7b, 0x7c, 0x7c, 0x7d, 0x7d, 0x7e, - 0x7e, 0x7e, 0x7e, 0x7e, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x81, 0x81, - 0x81, 0x81, 0x81, 0x81, 0x81, 0x81, 0x82, 0x82, 0x82, 0x82, 0x82, 0x83, 0x83, 0x84, 0x84, 0x85, 0x85, 0x86, 0x87, - 0x88, 0x88, 0x8a, 0x8b, 0x8c, 0x8e, 0x8f, 0x91, 0x93, 0x96, 0x98, 0x9b, 0x9f, 0xa2, 0xa6, 0xaa, 0xaf, 0xb4, 0xb9, - 0xbf, 0xc5, 0xcb, 0xd2, 0xd9, 0xe1, 0xe8, 0xf0, 0xf8, -}; - -const q15_t tanhTable_q15[256] = { - 0x0000, 0x07fd, 0x0feb, 0x17b9, 0x1f59, 0x26bf, 0x2ddf, 0x34ae, 0x3b27, 0x4142, 0x46fd, 0x4c56, 0x514d, 0x55e2, - 0x5a1a, 0x5df6, 0x617c, 0x64b0, 0x6797, 0x6a37, 0x6c95, 0x6eb5, 0x709e, 0x7254, 0x73dc, 0x753a, 0x7672, 0x7788, - 0x787f, 0x795b, 0x7a1e, 0x7acb, 0x7b65, 0x7bee, 0x7c66, 0x7cd1, 0x7d30, 0x7d84, 0x7dce, 0x7e0f, 0x7e49, 0x7e7d, - 0x7eaa, 0x7ed2, 0x7ef5, 0x7f14, 0x7f30, 0x7f48, 0x7f5e, 0x7f71, 0x7f82, 0x7f91, 0x7f9e, 0x7fa9, 0x7fb3, 0x7fbc, - 0x7fc4, 0x7fcb, 0x7fd1, 0x7fd7, 0x7fdc, 0x7fe0, 0x7fe4, 0x7fe7, 0x7fea, 0x7fed, 0x7fef, 0x7ff1, 0x7ff3, 0x7ff4, - 0x7ff6, 0x7ff7, 0x7ff8, 0x7ff9, 0x7ffa, 0x7ffa, 0x7ffb, 0x7ffc, 0x7ffc, 0x7ffd, 0x7ffd, 0x7ffd, 0x7ffe, 0x7ffe, - 0x7ffe, 0x7ffe, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, - 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, - 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, - 0x7fff, 0x7fff, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001, - 0x8001, 0x8001, 0x8001, 0x8002, 0x8002, 0x8002, 0x8002, 0x8003, 0x8003, 0x8003, 0x8004, 0x8004, 0x8005, 0x8006, - 0x8006, 0x8007, 0x8008, 0x8009, 0x800a, 0x800c, 0x800d, 0x800f, 0x8011, 0x8013, 0x8016, 0x8019, 0x801c, 0x8020, - 0x8024, 0x8029, 0x802f, 0x8035, 0x803c, 0x8044, 0x804d, 0x8057, 0x8062, 0x806f, 0x807e, 0x808f, 0x80a2, 0x80b8, - 0x80d0, 0x80ec, 0x810b, 0x812e, 0x8156, 0x8183, 0x81b7, 0x81f1, 0x8232, 0x827c, 0x82d0, 0x832f, 0x839a, 0x8412, - 0x849b, 0x8535, 0x85e2, 0x86a5, 0x8781, 0x8878, 0x898e, 0x8ac6, 0x8c24, 0x8dac, 0x8f62, 0x914b, 0x936b, 0x95c9, - 0x9869, 0x9b50, 0x9e84, 0xa20a, 0xa5e6, 0xaa1e, 0xaeb3, 0xb3aa, 0xb903, 0xbebe, 0xc4d9, 0xcb52, 0xd221, 0xd941, - 0xe0a7, 0xe847, 0xf015, 0xf803, -}; - -const q15_t tanhLTable_q15[128] = { - 0x0000, 0x0400, 0x07fd, 0x0bf7, 0x0feb, 0x13d7, 0x17b9, 0x1b90, 0x1f59, 0x2314, 0x26bf, 0x2a58, 0x2ddf, - 0x3151, 0x34ae, 0x37f6, 0x3b27, 0x3e40, 0x4142, 0x442c, 0x46fd, 0x49b6, 0x4c56, 0x4edd, 0x514d, 0x53a3, - 0x55e2, 0x580a, 0x5a1a, 0x5c13, 0x5df6, 0x5fc4, 0x617c, 0x6320, 0x64b0, 0x662d, 0x6797, 0x68f0, 0x6a37, - 0x6b6e, 0x6c95, 0x6dac, 0x6eb5, 0x6fb0, 0x709e, 0x717f, 0x7254, 0x731e, 0x73dc, 0x7490, 0x753a, 0x75da, - 0x7672, 0x7701, 0x7788, 0x7807, 0x787f, 0x78f0, 0x795b, 0x79bf, 0x7a1e, 0x7a77, 0x7acb, 0x7b1b, 0x849b, - 0x84e5, 0x8535, 0x8589, 0x85e2, 0x8641, 0x86a5, 0x8710, 0x8781, 0x87f9, 0x8878, 0x88ff, 0x898e, 0x8a26, - 0x8ac6, 0x8b70, 0x8c24, 0x8ce2, 0x8dac, 0x8e81, 0x8f62, 0x9050, 0x914b, 0x9254, 0x936b, 0x9492, 0x95c9, - 0x9710, 0x9869, 0x99d3, 0x9b50, 0x9ce0, 0x9e84, 0xa03c, 0xa20a, 0xa3ed, 0xa5e6, 0xa7f6, 0xaa1e, 0xac5d, - 0xaeb3, 0xb123, 0xb3aa, 0xb64a, 0xb903, 0xbbd4, 0xbebe, 0xc1c0, 0xc4d9, 0xc80a, 0xcb52, 0xceaf, 0xd221, - 0xd5a8, 0xd941, 0xdcec, 0xe0a7, 0xe470, 0xe847, 0xec29, 0xf015, 0xf409, 0xf803, 0xfc00, -}; - -const q15_t tanhHTable_q15[192] = { - 0x7b65, 0x7bee, 0x7c66, 0x7cd1, 0x7d30, 0x7d84, 0x7dce, 0x7e0f, 0x7e49, 0x7e7d, 0x7eaa, 0x7ed2, 0x7ef5, 0x7f14, - 0x7f30, 0x7f48, 0x7f5e, 0x7f71, 0x7f82, 0x7f91, 0x7f9e, 0x7fa9, 0x7fb3, 0x7fbc, 0x7fc4, 0x7fcb, 0x7fd1, 0x7fd7, - 0x7fdc, 0x7fe0, 0x7fe4, 0x7fe7, 0x7fea, 0x7fed, 0x7fef, 0x7ff1, 0x7ff3, 0x7ff4, 0x7ff6, 0x7ff7, 0x7ff8, 0x7ff9, - 0x7ffa, 0x7ffa, 0x7ffb, 0x7ffc, 0x7ffc, 0x7ffd, 0x7ffd, 0x7ffd, 0x7ffe, 0x7ffe, 0x7ffe, 0x7ffe, 0x7fff, 0x7fff, - 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, - 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, - 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x7fff, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, - 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001, 0x8001, 0x8002, - 0x8002, 0x8002, 0x8002, 0x8003, 0x8003, 0x8003, 0x8004, 0x8004, 0x8005, 0x8006, 0x8006, 0x8007, 0x8008, 0x8009, - 0x800a, 0x800c, 0x800d, 0x800f, 0x8011, 0x8013, 0x8016, 0x8019, 0x801c, 0x8020, 0x8024, 0x8029, 0x802f, 0x8035, - 0x803c, 0x8044, 0x804d, 0x8057, 0x8062, 0x806f, 0x807e, 0x808f, 0x80a2, 0x80b8, 0x80d0, 0x80ec, 0x810b, 0x812e, - 0x8156, 0x8183, 0x81b7, 0x81f1, 0x8232, 0x827c, 0x82d0, 0x832f, 0x839a, 0x8412, -}; diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c deleted file mode 100644 index 6f2f575..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_no_shift.c +++ /dev/null @@ -1,121 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_q7_to_q15_no_shift.c - * Description: Converts the elements of the Q7 vector to Q15 vector without left-shift - * - * $Date: May 29, 2020 - * $Revision: V.1.0.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup nndata_convert - * @{ - */ - -/** - * @brief Converts the elements of the Q7 vector to Q15 vector without left-shift - * @param[in] *pSrc points to the Q7 input vector - * @param[out] *pDst points to the Q15 output vector - * @param[in] blockSize length of the input vector - * - * \par Description: - * - * The equation used for the conversion process is: - * - *
- * 	pDst[n] = (q15_t) pSrc[n];   0 <= n < blockSize.
- * 
- * - */ - -void arm_q7_to_q15_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t blockSize) -{ - const q7_t *pIn = pSrc; - uint32_t blkCnt; - -#if defined(ARM_MATH_DSP) - q31_t in; - q31_t in1, in2; - q31_t out1, out2; - - /*loop Unrolling */ - blkCnt = blockSize >> 2u; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. */ - while (blkCnt > 0u) - { - in = arm_nn_read_q7x4_ia(&pIn); - - /* rotatate in by 8 and extend two q7_t values to q15_t values */ - in1 = __SXTB16(__ROR((uint32_t)in, 8)); - - /* extend remaining two q7_t values to q15_t values */ - in2 = __SXTB16(in); - -#ifndef ARM_MATH_BIG_ENDIAN - out2 = (int32_t)__PKHTB(in1, in2, 16); - out1 = (int32_t)__PKHBT(in2, in1, 16); -#else - out1 = (int32_t)__PKHTB(in1, in2, 16); - out2 = (int32_t)__PKHBT(in2, in1, 16); -#endif - arm_nn_write_q15x2_ia(&pDst, out1); - arm_nn_write_q15x2_ia(&pDst, out2); - - /* Decrement the loop counter */ - blkCnt--; - } - - /* If the blockSize is not a multiple of 4, compute any remaining output samples here. - ** No loop unrolling is used. */ - blkCnt = blockSize % 0x4u; - -#else - - /* Run the below code for Cortex-M0 */ - - /* Loop over blockSize number of values */ - blkCnt = blockSize; - -#endif /* #ifndef ARM_MATH_CM0_FAMILY */ - - while (blkCnt > 0u) - { - /* convert from q7 to q15 and then store the results in the destination buffer */ - *pDst++ = (q15_t)*pIn++; - - /* Decrement the loop counter */ - blkCnt--; - } -} - -/** - * @} end of nndata_convert group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c deleted file mode 100644 index 8abbc3a..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_no_shift.c +++ /dev/null @@ -1,143 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_q7_to_q15_reordered_no_shift.c - * Description: Converts the elements of the Q7 vector to reordered Q15 vector without left-shift - * - * $Date: July 20, 2021 - * $Revision: V.1.1.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup nndata_convert - * @{ - */ - -/** - * @brief Converts the elements of the Q7 vector to reordered Q15 vector without left-shift - * @param[in] *pSrc points to the Q7 input vector - * @param[out] *pDst points to the Q15 output vector - * @param[in] blockSize length of the input vector - * - * @details - * - * This function does the q7 to q15 expansion with re-ordering - * - *
- *                          |   A1   |   A2   |   A3   |   A4   |
- *
- *                           0      7 8     15 16    23 24    31
- * 
- * - * is converted into: - * - *
- *  |       A1       |       A3       |   and  |       A2       |       A4       |
- *
- *   0             15 16            31          0             15 16            31
- * 
- * - * - * This looks strange but is natural considering how sign-extension is done at - * assembly level. - * - * The expansion of other other oprand will follow the same rule so that the end - * results are the same. - * - * The tail (i.e., last (N % 4) elements) will still be in original order. - * - */ - -void arm_q7_to_q15_reordered_no_shift(const q7_t *pSrc, q15_t *pDst, uint32_t blockSize) -{ - const q7_t *pIn = pSrc; /* Src pointer */ - uint32_t blkCnt; /* loop counter */ - -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - q31_t in; - q31_t in1, in2; - - /* Run the below code for Cortex-M4 and Cortex-M3 */ - - /*loop Unrolling */ - blkCnt = blockSize >> 2u; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. - ** a second loop below computes the remaining 1 to 3 samples. */ - while (blkCnt > 0u) - { - /* C = (q15_t) A << 8 */ - /* convert from q7 to q15 and then store the results in the destination buffer */ - in = arm_nn_read_q7x4_ia(&pIn); - - /* rotatate in by 8 and extend two q7_t values to q15_t values */ - in1 = __SXTB16(__ROR((uint32_t)in, 8)); - - /* extend remainig two q7_t values to q15_t values */ - in2 = __SXTB16(in); - -#ifndef ARM_MATH_BIG_ENDIAN - arm_nn_write_q7x4_ia((q7_t **)&pDst, in2); - arm_nn_write_q7x4_ia((q7_t **)&pDst, in1); -#else - arm_nn_write_q7x4_ia((q7_t **)&pDst, in1); - arm_nn_write_q7x4_ia((q7_t **)&pDst, in2); -#endif - - /* Decrement the loop counter */ - blkCnt--; - } - - /* If the blockSize is not a multiple of 4, compute any remaining output samples here. - ** No loop unrolling is used. */ - blkCnt = blockSize % 0x4u; - -#else - - /* Run the below code for Cortex-M0 */ - - /* Loop over blockSize number of values */ - blkCnt = blockSize; - -#endif /* #ifndef ARM_MATH_CM0_FAMILY */ - - while (blkCnt > 0u) - { - /* C = (q15_t) A << 8 */ - /* convert from q7 to q15 and then store the results in the destination buffer */ - *pDst++ = (q15_t)*pIn++; - - /* Decrement the loop counter */ - blkCnt--; - } -} - -/** - * @} end of q7_to_x group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c deleted file mode 100644 index 765929d..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_reordered_with_offset.c +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_q7_to_q15_reordered_with_offset.c - * Description: Converts the elements of the Q7 vector to a reordered Q15 vector with an added offset. The re-ordering - * is a signature of sign extension intrinsic(DSP extension). - * - * $Date: May 29, 2020 - * $Revision: V.2.0.3 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup nndata_convert - * @{ - */ - -/** - * @brief Converts the elements of the Q7 vector to a reordered Q15 vector with an added offset. - * - * @note Refer header file for details. - * - */ - -void arm_q7_to_q15_reordered_with_offset(const q7_t *src, q15_t *dst, uint32_t block_size, q15_t offset) -{ - -#if defined(ARM_MATH_DSP) - uint32_t block_cnt; - /* Run the below code for cores that support SIMD instructions */ - q31_t in_q7x4; - q31_t out_q15x2_1; - q31_t out_q15x2_2; - - /*loop unrolling */ - block_cnt = block_size >> 2u; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. */ - const q31_t offset_q15x2 = (q31_t)__PKHBT(offset, offset, 16); - while (block_cnt > 0u) - { - /* convert from q7 to q15 and then store the results in the destination buffer */ - in_q7x4 = arm_nn_read_q7x4_ia(&src); - - /* Extract and sign extend each of the four q7 values to q15 */ - out_q15x2_1 = __SXTAB16(offset_q15x2, __ROR((uint32_t)in_q7x4, 8)); - out_q15x2_2 = __SXTAB16(offset_q15x2, in_q7x4); - - arm_nn_write_q15x2_ia(&dst, out_q15x2_2); - arm_nn_write_q15x2_ia(&dst, out_q15x2_1); - - block_cnt--; - } - /* Handle left over samples */ - block_cnt = block_size % 0x4u; - - while (block_cnt > 0u) - { - *dst++ = (q15_t)*src++ + offset; - - /* Decrement the loop counter */ - block_cnt--; - } -#else - (void)src; - (void)dst; - (void)block_size; - (void)offset; - /* Not available */ -#endif -} - -/** - * @} end of nndata_convert group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_with_offset.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_with_offset.c deleted file mode 100644 index ea29986..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/NNSupportFunctions/arm_q7_to_q15_with_offset.c +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in_q7x4 compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in_q7x4 writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_q7_to_q15_with_offset.c - * Description: Converts the elements of the Q7 vector to Q15 vector with an added offset - * - * $Date: March 3, 2020 - * $Revision: V.2.0.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup nndata_convert - * @{ - */ - -void arm_q7_to_q15_with_offset(const q7_t *src, q15_t *dst, uint32_t block_size, q15_t offset) -{ - int block_cnt; - -#if defined(ARM_MATH_MVEI) - - int16x8_t source; - const int16x8_t source_offset = vdupq_n_s16(offset); - block_cnt = block_size / 8; - - while (block_cnt > 0) - { - source = vldrbq_s16(src); - source = vaddq_s16(source, source_offset); - vstrhq_s16(dst, source); - dst += 8; - src += 8; - block_cnt--; - } - - block_cnt = block_size & 0x7; - -#elif defined(ARM_MATH_DSP) - /* Run the below code for cores that support SIMD instructions */ - q31_t in_q7x4; - q31_t in_q15x2_1; - q31_t in_q15x2_2; - q31_t out_q15x2_1; - q31_t out_q15x2_2; - - /*loop unrolling */ - block_cnt = block_size >> 2; - - /* First part of the processing with loop unrolling. Compute 4 outputs at a time. */ - const q31_t offset_q15x2 = __PKHBT(offset, offset, 16); - while (block_cnt > 0) - { - /* convert from q7 to q15 and then store the results in the destination buffer */ - in_q7x4 = arm_nn_read_q7x4_ia(&src); - - /* Extract and sign extend each of the four q7 values to q15 */ - in_q15x2_1 = __SXTAB16(offset_q15x2, __ROR(in_q7x4, 8)); - in_q15x2_2 = __SXTAB16(offset_q15x2, in_q7x4); - - out_q15x2_2 = __PKHTB(in_q15x2_1, in_q15x2_2, 16); - out_q15x2_1 = __PKHBT(in_q15x2_2, in_q15x2_1, 16); - - arm_nn_write_q15x2_ia(&dst, out_q15x2_1); - arm_nn_write_q15x2_ia(&dst, out_q15x2_2); - - block_cnt--; - } - /* Handle left over samples */ - block_cnt = block_size % 0x4; - -#else - /* Run the below code for Cortex-M0 */ - /* Loop over block_size number of values */ - block_cnt = block_size; -#endif - - while (block_cnt > 0) - { - *dst++ = (q15_t)*src++ + offset; - - /* Decrement the loop counter */ - block_cnt--; - } -} - -/** - * @} end of nndata_convert group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/CMakeLists.txt b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/CMakeLists.txt deleted file mode 100644 index a37503b..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/CMakeLists.txt +++ /dev/null @@ -1,24 +0,0 @@ -# -# Copyright (c) 2019-2022 Arm Limited. -# -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the License); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an AS IS BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -file(GLOB SRC "./*_s8.c") -file(GLOB SRC_S16 "./*_s16.c") -target_sources(cmsis-nn PRIVATE ${SRC} ${SRC_S16}) - - - diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s16.c deleted file mode 100644 index 5cd2b1c..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s16.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - * Copyright (C) 2022 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_avgpool_s16.c - * Description: Pooling function implementations - * - * $Date: 3. February 2022 - * $Revision: V.1.0.1 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Pooling - * @{ - */ - -/* - * s16 average pooling function - * - * Refer to header file for details. - * - */ -arm_status arm_avgpool_s16(const cmsis_nn_context *ctx, - const cmsis_nn_pool_params *pool_params, - const cmsis_nn_dims *input_dims, - const q15_t *src, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims, - q15_t *dst) -{ - (void)ctx; - const int32_t input_y = input_dims->h; - const int32_t input_x = input_dims->w; - const int32_t output_y = output_dims->h; - const int32_t output_x = output_dims->w; - const int32_t stride_y = pool_params->stride.h; - const int32_t stride_x = pool_params->stride.w; - const int32_t kernel_y = filter_dims->h; - const int32_t kernel_x = filter_dims->w; - const int32_t pad_y = pool_params->padding.h; - const int32_t pad_x = pool_params->padding.w; - const int32_t act_min = pool_params->activation.min; - const int32_t act_max = pool_params->activation.max; - const int32_t ch_src = input_dims->c; - - /* Reference C code adapted from CMSIS-NN arm_avgpool_s8.c. - */ - - for (int i_y = 0, base_idx_y = -pad_y; i_y < output_y; base_idx_y += stride_y, i_y++) - { - for (int i_x = 0, base_idx_x = -pad_x; i_x < output_x; base_idx_x += stride_x, i_x++) - { - /* Condition for kernel start dimension: (base_idx_ + kernel__start) >= 0 */ - const int32_t ker_y_start = MAX(0, -base_idx_y); - const int32_t ker_x_start = MAX(0, -base_idx_x); - - /* Condition for kernel end dimension: (base_idx_ + kernel__end) < dim_src_ */ - const int32_t kernel_y_end = MIN(kernel_y, input_y - base_idx_y); - const int32_t kernel_x_end = MIN(kernel_x, input_x - base_idx_x); - - for (int i_ch_in = 0; i_ch_in < ch_src; i_ch_in++) - { - int sum = 0; - int count = 0; - - for (int k_y = ker_y_start; k_y < kernel_y_end; k_y++) - { - for (int k_x = ker_x_start; k_x < kernel_x_end; k_x++) - { - sum += src[i_ch_in + ch_src * (k_x + base_idx_x + (k_y + base_idx_y) * input_x)]; - count++; - } - } - - // Prevent static code issue DIVIDE_BY_ZERO. - if (count == 0) - { - return ARM_MATH_ARGUMENT_ERROR; - } - - sum = sum > 0 ? (sum + count / 2) / count : (sum - count / 2) / count; - sum = MAX(sum, act_min); - sum = MIN(sum, act_max); - - dst[i_ch_in + ch_src * (i_x + i_y * output_x)] = sum; - } - } - } - - return ARM_MATH_SUCCESS; -} - -int32_t arm_avgpool_s16_get_buffer_size(const int output_x, const int ch_src) -{ - (void)output_x; - (void)ch_src; - return 0; -} - -/** - * @} end of Pooling group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c deleted file mode 100644 index 3e9861e..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_avgpool_s8.c +++ /dev/null @@ -1,401 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_avgpool_s8.c - * Description: Pooling function implementations - * - * $Date: 01. March 2021 - * $Revision: V.2.0.4 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - -static void scale_q31_to_q7_and_clamp(const q31_t *buffer, - q7_t *target, - int32_t length, - const int32_t count, - const int act_min, - const int act_max) -{ - const int half_count = count / 2; - - // Prevent static code issue DIVIDE_BY_ZERO. - if (count == 0) - { - return; - } - - for (int i = 0; i < length; i++) - { - int32_t sum = buffer[i] > 0 ? (buffer[i] + half_count) : (buffer[i] - half_count); - sum = sum / count; - sum = MAX(sum, act_min); - sum = MIN(sum, act_max); - - target[i] = (q7_t)sum; - } -} -#endif - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Pooling - * @{ - */ - -/* - * s8 average pooling function - * - * Refer to header file for details. - * - */ - -#if defined(ARM_MATH_MVEI) - -arm_status arm_avgpool_s8(const cmsis_nn_context *ctx, - const cmsis_nn_pool_params *pool_params, - const cmsis_nn_dims *input_dims, - const q7_t *src, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims, - q7_t *dst) -{ - (void)ctx; - const int32_t input_y = input_dims->h; - const int32_t input_x = input_dims->w; - const int32_t output_y = output_dims->h; - const int32_t output_x = output_dims->w; - const int32_t stride_y = pool_params->stride.h; - const int32_t stride_x = pool_params->stride.w; - const int32_t kernel_y = filter_dims->h; - const int32_t kernel_x = filter_dims->w; - const int32_t pad_y = pool_params->padding.h; - const int32_t pad_x = pool_params->padding.w; - const int32_t act_min = pool_params->activation.min; - const int32_t act_max = pool_params->activation.max; - const int32_t ch_src = input_dims->c; - - int32_t i_x, i_y; - int32_t k_x, k_y; - - for (i_y = 0; i_y < output_y; i_y++) - { - for (i_x = 0; i_x < output_x; i_x++) - { - - int32_t k_y_start, k_y_end; - int32_t k_x_start, k_x_end; - int32_t chCnt; - const int8_t *pTmp, *pTmpInner; - int8_t *pDst; - - k_y_start = MAX(0, i_y * stride_y - pad_y); - k_y_end = MIN(i_y * stride_y - pad_y + kernel_y, input_y); - - k_x_start = MAX(0, i_x * stride_x - pad_x); - k_x_end = MIN(i_x * stride_x - pad_x + kernel_x, input_x); - - pTmp = src; - pDst = &dst[ch_src * (i_x + i_y * output_x)]; - - chCnt = ch_src >> 4; - while (chCnt > 0) - { - int32x4_t sumV1, sumV2, sumV3, sumV4; - - int8x16_t tempV; - int16x8_t tempVLO, tempVHI; - int32x4_t tempVLOLO, tempVLOHI, tempVHILO, tempVHIHI; - int32_t count = 0; - - sumV1 = vdupq_n_s32(0); - sumV2 = vdupq_n_s32(0); - sumV3 = vdupq_n_s32(0); - sumV4 = vdupq_n_s32(0); - - for (k_y = k_y_start; k_y < k_y_end; k_y++) - { - for (k_x = k_x_start; k_x < k_x_end; k_x++) - { - pTmpInner = pTmp + (ch_src * (k_x + k_y * input_x)); - tempV = vldrbq_s8(pTmpInner); - - tempVLO = vmovlbq_s8(tempV); - tempVHI = vmovltq_s8(tempV); - - tempVLOLO = vmovlbq_s16(tempVLO); - tempVLOHI = vmovltq_s16(tempVLO); - - tempVHILO = vmovlbq_s16(tempVHI); - tempVHIHI = vmovltq_s16(tempVHI); - - sumV1 = vaddq_s32(sumV1, tempVLOLO); - sumV2 = vaddq_s32(sumV2, tempVLOHI); - sumV3 = vaddq_s32(sumV3, tempVHILO); - sumV4 = vaddq_s32(sumV4, tempVHIHI); - - count++; - } - } - - // Prevent static code issue DIVIDE_BY_ZERO. - if (count == 0) - { - return ARM_MATH_ARGUMENT_ERROR; - } - - sumV1[0] = sumV1[0] > 0 ? (sumV1[0] + count / 2) / count : (sumV1[0] - count / 2) / count; - sumV1[1] = sumV1[1] > 0 ? (sumV1[1] + count / 2) / count : (sumV1[1] - count / 2) / count; - sumV1[2] = sumV1[2] > 0 ? (sumV1[2] + count / 2) / count : (sumV1[2] - count / 2) / count; - sumV1[3] = sumV1[3] > 0 ? (sumV1[3] + count / 2) / count : (sumV1[3] - count / 2) / count; - - sumV2[0] = sumV2[0] > 0 ? (sumV2[0] + count / 2) / count : (sumV2[0] - count / 2) / count; - sumV2[1] = sumV2[1] > 0 ? (sumV2[1] + count / 2) / count : (sumV2[1] - count / 2) / count; - sumV2[2] = sumV2[2] > 0 ? (sumV2[2] + count / 2) / count : (sumV2[2] - count / 2) / count; - sumV2[3] = sumV2[3] > 0 ? (sumV2[3] + count / 2) / count : (sumV2[3] - count / 2) / count; - - sumV3[0] = sumV3[0] > 0 ? (sumV3[0] + count / 2) / count : (sumV3[0] - count / 2) / count; - sumV3[1] = sumV3[1] > 0 ? (sumV3[1] + count / 2) / count : (sumV3[1] - count / 2) / count; - sumV3[2] = sumV3[2] > 0 ? (sumV3[2] + count / 2) / count : (sumV3[2] - count / 2) / count; - sumV3[3] = sumV3[3] > 0 ? (sumV3[3] + count / 2) / count : (sumV3[3] - count / 2) / count; - - sumV4[0] = sumV4[0] > 0 ? (sumV4[0] + count / 2) / count : (sumV4[0] - count / 2) / count; - sumV4[1] = sumV4[1] > 0 ? (sumV4[1] + count / 2) / count : (sumV4[1] - count / 2) / count; - sumV4[2] = sumV4[2] > 0 ? (sumV4[2] + count / 2) / count : (sumV4[2] - count / 2) / count; - sumV4[3] = sumV4[3] > 0 ? (sumV4[3] + count / 2) / count : (sumV4[3] - count / 2) / count; - - sumV1 = vmaxq_s32(sumV1, vdupq_n_s32(act_min)); - sumV1 = vminq_s32(sumV1, vdupq_n_s32(act_max)); - - sumV2 = vmaxq_s32(sumV2, vdupq_n_s32(act_min)); - sumV2 = vminq_s32(sumV2, vdupq_n_s32(act_max)); - - sumV3 = vmaxq_s32(sumV3, vdupq_n_s32(act_min)); - sumV3 = vminq_s32(sumV3, vdupq_n_s32(act_max)); - - sumV4 = vmaxq_s32(sumV4, vdupq_n_s32(act_min)); - sumV4 = vminq_s32(sumV4, vdupq_n_s32(act_max)); - - tempVLO = vmovnbq_s32(tempVLO, sumV1); - tempVLO = vmovntq_s32(tempVLO, sumV2); - - tempVHI = vmovnbq_s32(tempVHI, sumV3); - tempVHI = vmovntq_s32(tempVHI, sumV4); - - tempV = vmovnbq_s16(tempV, tempVLO); - tempV = vmovntq_s16(tempV, tempVHI); - - vstrbq_s8(pDst, tempV); - pDst += 16; - - chCnt--; - pTmp += 16; - } - - chCnt = ch_src & 0xF; - while (chCnt > 0) - { - int32_t sum = 0; - int32_t count = 0; - - for (k_y = k_y_start; k_y < k_y_end; k_y++) - { - for (k_x = k_x_start; k_x < k_x_end; k_x++) - { - sum += pTmp[ch_src * (k_x + k_y * input_x)]; - count++; - } - } - - // Prevent static code issue DIVIDE_BY_ZERO. - if (count == 0) - { - return ARM_MATH_ARGUMENT_ERROR; - } - - sum = sum > 0 ? (sum + count / 2) / count : (sum - count / 2) / count; - sum = MAX(sum, act_min); - sum = MIN(sum, act_max); - - *pDst++ = sum; - - chCnt--; - pTmp++; - } - } - } - return ARM_MATH_SUCCESS; -} - -#else -arm_status arm_avgpool_s8(const cmsis_nn_context *ctx, - const cmsis_nn_pool_params *pool_params, - const cmsis_nn_dims *input_dims, - const q7_t *src, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims, - q7_t *dst) -{ - const int32_t input_y = input_dims->h; - const int32_t input_x = input_dims->w; - const int32_t output_y = output_dims->h; - const int32_t output_x = output_dims->w; - const int32_t stride_y = pool_params->stride.h; - const int32_t stride_x = pool_params->stride.w; - const int32_t kernel_y = filter_dims->h; - const int32_t kernel_x = filter_dims->w; - const int32_t pad_y = pool_params->padding.h; - const int32_t pad_x = pool_params->padding.w; - const int32_t act_min = pool_params->activation.min; - const int32_t act_max = pool_params->activation.max; - const int32_t ch_src = input_dims->c; - - if (ctx->buf == NULL && arm_avgpool_s8_get_buffer_size(output_dims->w, input_dims->c)) - { - return ARM_MATH_ARGUMENT_ERROR; - } - q31_t *buffer = (q31_t *)ctx->buf; - -#if defined(ARM_MATH_DSP) - - /* Run the following code for CPU's with DSP extension - */ - for (int i_y = 0, idx_y = -pad_y; i_y < output_y; idx_y += stride_y, i_y++) - { - for (int i_x = 0, idx_x = -pad_x; i_x < output_x; idx_x += stride_x, i_x++) - { - /* Condition for kernel start dimension: - (base_idx_ + kernel__start) >= 0 */ - const int32_t kernel_y_start = MAX(0, -idx_y); - const int32_t kernel_x_start = MAX(0, -idx_x); - - /* Condition for kernel end dimension: - (base_idx_ + kernel__end) < dim_src_ */ - const int32_t kernel_y_end = MIN(kernel_y, input_y - idx_y); - const int32_t kernel_x_end = MIN(kernel_x, input_x - idx_x); - - int count = 0; - - for (int k_y = kernel_y_start; k_y < kernel_y_end; k_y++) - { - for (int k_x = kernel_x_start; k_x < kernel_x_end; k_x++) - { - const q7_t *start = src + ch_src * (k_x + idx_x + (k_y + idx_y) * input_x); - - if (count == 0) - { - for (int i = 0; i < ch_src; i++) - { - buffer[i] = start[i]; - } - } - else - { - for (int i = 0; i < ch_src; i++) - { - buffer[i] = __QADD(start[i], buffer[i]); - } - } - count++; - } - } - - // Prevent static code issue DIVIDE_BY_ZERO. - if (count == 0) - { - return ARM_MATH_ARGUMENT_ERROR; - } - - scale_q31_to_q7_and_clamp(buffer, dst, ch_src, count, act_min, act_max); - dst += ch_src; - } - } -#else - - /* Reference C code adapted from CMSIS-NN arm_avepool_q7_HWC. - */ - (void)buffer; - int16_t i_ch_in, i_x, i_y; - int16_t k_x, k_y; - - for (i_y = 0; i_y < output_y; i_y++) - { - for (i_x = 0; i_x < output_x; i_x++) - { - for (i_ch_in = 0; i_ch_in < ch_src; i_ch_in++) - { - int sum = 0; - int count = 0; - for (k_y = i_y * stride_y - pad_y; k_y < i_y * stride_y - pad_y + kernel_y; k_y++) - { - for (k_x = i_x * stride_x - pad_x; k_x < i_x * stride_x - pad_x + kernel_x; k_x++) - { - if (k_y >= 0 && k_x >= 0 && k_y < input_y && k_x < input_x) - { - sum += src[i_ch_in + ch_src * (k_x + k_y * input_x)]; - count++; - } - } - } - - // Prevent static code issue DIVIDE_BY_ZERO. - if (count == 0) - { - return ARM_MATH_ARGUMENT_ERROR; - } - - sum = sum > 0 ? (sum + count / 2) / count : (sum - count / 2) / count; - sum = MAX(sum, act_min); - sum = MIN(sum, act_max); - - dst[i_ch_in + ch_src * (i_x + i_y * output_x)] = sum; - } - } - } - -#endif - return ARM_MATH_SUCCESS; -} - -#endif /* ARM_MATH_MVEI */ - -int32_t arm_avgpool_s8_get_buffer_size(const int output_x, const int ch_src) -{ - (void)output_x; - -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - return (ch_src * sizeof(int32_t)); -#else - (void)ch_src; - return 0; -#endif -} -/** - * @} end of Pooling group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s16.c deleted file mode 100644 index 483f874..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s16.c +++ /dev/null @@ -1,180 +0,0 @@ -/* - * Copyright (C) 2022 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_max_pool_s16.c - * Description: Pooling function implementations - * - * $Date: 24. January 2022 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -static void compare_and_replace_if_larger(int16_t *base, const int16_t *target, int32_t length) -{ - q15_t *dst = base; - const q15_t *src = target; - union arm_nnword ref_max; - union arm_nnword comp_max; - int32_t cnt = length >> 1; - - while (cnt > 0l) - { - ref_max.word = arm_nn_read_q15x2(dst); - comp_max.word = arm_nn_read_q15x2_ia(&src); - - if (comp_max.half_words[0] > ref_max.half_words[0]) - { - ref_max.half_words[0] = comp_max.half_words[0]; - } - if (comp_max.half_words[1] > ref_max.half_words[1]) - { - ref_max.half_words[1] = comp_max.half_words[1]; - } - - arm_nn_write_q15x2_ia(&dst, ref_max.word); - - cnt--; - } - - if (length & 0x1) - { - if (*src > *dst) - { - *dst = *src; - } - } -} - -static void clamp_output(int16_t *source, int32_t length, const int16_t act_min, const int16_t act_max) -{ - union arm_nnword in; - int32_t cnt = length >> 1; - - while (cnt > 0l) - { - in.word = arm_nn_read_q15x2(source); - - in.half_words[0] = MAX(in.half_words[0], act_min); - in.half_words[0] = MIN(in.half_words[0], act_max); - in.half_words[1] = MAX(in.half_words[1], act_min); - in.half_words[1] = MIN(in.half_words[1], act_max); - - arm_nn_write_q15x2_ia(&source, in.word); - cnt--; - } - - if (length & 0x1) - { - int16_t comp = *source; - comp = MAX(comp, act_min); - comp = MIN(comp, act_max); - *source = comp; - } -} - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Pooling - * @{ - */ - -/* - * Optimized s16 max pooling function - * - * Refer to header file for details. - * - */ - -arm_status arm_max_pool_s16(const cmsis_nn_context *ctx, - const cmsis_nn_pool_params *pool_params, - const cmsis_nn_dims *input_dims, - const int16_t *src, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims, - int16_t *dst) -{ - const int32_t input_y = input_dims->h; - const int32_t input_x = input_dims->w; - const int32_t output_y = output_dims->h; - const int32_t output_x = output_dims->w; - const int32_t stride_y = pool_params->stride.h; - const int32_t stride_x = pool_params->stride.w; - const int32_t kernel_y = filter_dims->h; - const int32_t kernel_x = filter_dims->w; - const int32_t pad_y = pool_params->padding.h; - const int32_t pad_x = pool_params->padding.w; - const int16_t act_min = pool_params->activation.min; - const int16_t act_max = pool_params->activation.max; - const int32_t channel_in = input_dims->c; - (void)ctx; - int16_t *dst_base = dst; - - for (int i_y = 0, base_idx_y = -pad_y; i_y < output_y; base_idx_y += stride_y, i_y++) - { - for (int i_x = 0, base_idx_x = -pad_x; i_x < output_x; base_idx_x += stride_x, i_x++) - { - /* Condition for kernel start dimension: (base_idx_ + kernel__start) >= 0 */ - const int32_t ker_y_start = MAX(0, -base_idx_y); - const int32_t ker_x_start = MAX(0, -base_idx_x); - - /* Condition for kernel end dimension: (base_idx_ + kernel__end) < dim_src_ */ - const int32_t kernel_y_end = MIN(kernel_y, input_y - base_idx_y); - const int32_t kernel_x_end = MIN(kernel_x, input_x - base_idx_x); - - int count = 0; - - for (int k_y = ker_y_start; k_y < kernel_y_end; k_y++) - { - for (int k_x = ker_x_start; k_x < kernel_x_end; k_x++) - { - const int16_t *start = src + channel_in * (k_x + base_idx_x + (k_y + base_idx_y) * input_x); - - if (count == 0) - { - memcpy(dst, start, channel_in * sizeof(int16_t)); - count++; - } - else - { - compare_and_replace_if_larger(dst, start, channel_in); - } - } - } - /* 'count' is expected to be non-zero here. */ - dst += channel_in; - } - } - - clamp_output(dst_base, output_x * output_y * channel_in, act_min, act_max); - - return ARM_MATH_SUCCESS; -} - -/** - * @} end of Pooling group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c deleted file mode 100644 index 4fbbc91..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_max_pool_s8.c +++ /dev/null @@ -1,229 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_max_pool_s8.c - * Description: Pooling function implementations - * - * $Date: 20. July 2021 - * $Revision: V.2.0.3 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -static void compare_and_replace_if_larger_q7(q7_t *base, const q7_t *target, int32_t length) -{ -#if defined(ARM_MATH_MVEI) - int32_t loop_count = (length + 15) / 16; - for (int i = 0; i < loop_count; i++) - { - mve_pred16_t p = vctp8q((uint32_t)length); - const int8x16_t op_1 = vldrbq_z_s8(base, p); - const int8x16_t op_2 = vldrbq_z_s8(target, p); - const int8x16_t max = vmaxq_m_s8(vuninitializedq_s8(), op_1, op_2, p); - vstrbq_p_s8(base, max, p); - base += 16; - target += 16; - length -= 16; - } -#else - q7_t *dst = base; - const q7_t *src = target; - union arm_nnword ref_max; - union arm_nnword comp_max; - int32_t cnt = length >> 2; - - while (cnt > 0l) - { - ref_max.word = arm_nn_read_q7x4(dst); - comp_max.word = arm_nn_read_q7x4_ia(&src); - - if (comp_max.bytes[0] > ref_max.bytes[0]) - { - ref_max.bytes[0] = comp_max.bytes[0]; - } - if (comp_max.bytes[1] > ref_max.bytes[1]) - { - ref_max.bytes[1] = comp_max.bytes[1]; - } - if (comp_max.bytes[2] > ref_max.bytes[2]) - { - ref_max.bytes[2] = comp_max.bytes[2]; - } - if (comp_max.bytes[3] > ref_max.bytes[3]) - { - ref_max.bytes[3] = comp_max.bytes[3]; - } - - arm_nn_write_q7x4_ia(&dst, ref_max.word); - - cnt--; - } - - cnt = length & 0x3; - while (cnt > 0l) - { - if (*src > *dst) - { - *dst = *src; - } - dst++; - src++; - cnt--; - } -#endif -} - -static void clamp_output(q7_t *source, int32_t length, const int32_t act_min, const int32_t act_max) -{ -#if defined(ARM_MATH_MVEI) - int32_t loop_count = (length + 15) / 16; - for (int i = 0; i < loop_count; i++) - { - mve_pred16_t p = vctp8q((uint32_t)length); - length -= 16; - const int8x16_t src = vldrbq_z_s8(source, p); - const int8x16_t predicated_min = vdupq_m_n_s8(vuninitializedq_s8(), (int8_t)act_min, p); - const int8x16_t predicated_max = vdupq_m_n_s8(vuninitializedq_s8(), (int8_t)act_max, p); - int8x16_t res = vmaxq_m_s8(vuninitializedq_s8(), src, predicated_min, p); - res = vminq_m_s8(vuninitializedq_s8(), res, predicated_max, p); - vstrbq_p_s8(source, res, p); - source += 16; - } -#else - union arm_nnword in; - int32_t cnt = length >> 2; - - while (cnt > 0l) - { - in.word = arm_nn_read_q7x4(source); - - in.bytes[0] = MAX(in.bytes[0], act_min); - in.bytes[0] = MIN(in.bytes[0], act_max); - in.bytes[1] = MAX(in.bytes[1], act_min); - in.bytes[1] = MIN(in.bytes[1], act_max); - in.bytes[2] = MAX(in.bytes[2], act_min); - in.bytes[2] = MIN(in.bytes[2], act_max); - in.bytes[3] = MAX(in.bytes[3], act_min); - in.bytes[3] = MIN(in.bytes[3], act_max); - - arm_nn_write_q7x4_ia(&source, in.word); - cnt--; - } - - cnt = length & 0x3; - while (cnt > 0l) - { - int32_t comp = *source; - comp = MAX(comp, act_min); - comp = MIN(comp, act_max); - *source++ = (int8_t)comp; - cnt--; - } -#endif -} - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Pooling - * @{ - */ - -/* - * Optimized s8 max pooling function - * - * Refer to header file for details. - * - */ - -arm_status arm_max_pool_s8(const cmsis_nn_context *ctx, - const cmsis_nn_pool_params *pool_params, - const cmsis_nn_dims *input_dims, - const q7_t *src, - const cmsis_nn_dims *filter_dims, - const cmsis_nn_dims *output_dims, - q7_t *dst) -{ - const int32_t input_y = input_dims->h; - const int32_t input_x = input_dims->w; - const int32_t output_y = output_dims->h; - const int32_t output_x = output_dims->w; - const int32_t stride_y = pool_params->stride.h; - const int32_t stride_x = pool_params->stride.w; - const int32_t kernel_y = filter_dims->h; - const int32_t kernel_x = filter_dims->w; - const int32_t pad_y = pool_params->padding.h; - const int32_t pad_x = pool_params->padding.w; - const int32_t act_min = pool_params->activation.min; - const int32_t act_max = pool_params->activation.max; - const int32_t channel_in = input_dims->c; - (void)ctx; - q7_t *dst_base = dst; - - for (int i_y = 0, base_idx_y = -pad_y; i_y < output_y; base_idx_y += stride_y, i_y++) - { - for (int i_x = 0, base_idx_x = -pad_x; i_x < output_x; base_idx_x += stride_x, i_x++) - { - /* Condition for kernel start dimension: (base_idx_ + kernel__start) >= 0 */ - const int32_t ker_y_start = MAX(0, -base_idx_y); - const int32_t ker_x_start = MAX(0, -base_idx_x); - - /* Condition for kernel end dimension: (base_idx_ + kernel__end) < dim_src_ */ - const int32_t kernel_y_end = MIN(kernel_y, input_y - base_idx_y); - const int32_t kernel_x_end = MIN(kernel_x, input_x - base_idx_x); - - int count = 0; - - for (int k_y = ker_y_start; k_y < kernel_y_end; k_y++) - { - for (int k_x = ker_x_start; k_x < kernel_x_end; k_x++) - { - const q7_t *start = src + channel_in * (k_x + base_idx_x + (k_y + base_idx_y) * input_x); - - if (count == 0) - { - arm_memcpy_q7(dst, start, channel_in); - count++; - } - else - { - compare_and_replace_if_larger_q7(dst, start, channel_in); - } - } - } - /* 'count' is expected to be non-zero here. */ - dst += channel_in; - } - } - - clamp_output(dst_base, output_x * output_y * channel_in, act_min, act_max); - - return ARM_MATH_SUCCESS; -} - -/** - * @} end of Pooling group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c deleted file mode 100644 index 5a3b1af..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/PoolingFunctions/arm_pool_q7_HWC.c +++ /dev/null @@ -1,464 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_pool_q7_HWC.c - * Description: Pooling function implementations - * - * $Date: 20. July 2021 - * $Revision: V.1.1.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - -/** - * @brief A few utility functions used by pooling functions - * - * - */ - -static void buffer_scale_back_q15_to_q7(q15_t *buffer, q7_t *target, uint16_t length, uint16_t scale) -{ - int i; - - for (i = 0; i < length; i++) - { - target[i] = (q7_t)(buffer[i] / scale); - } -} - -static void compare_and_replace_if_larger_q7(q7_t *base, // base data - const q7_t *target, // compare target - const uint16_t length // data length -) -{ - q7_t *pIn = base; - const q7_t *pCom = target; - union arm_nnword in; - union arm_nnword com; - uint16_t cnt = length >> 2; - - while (cnt > 0u) - { - in.word = arm_nn_read_q7x4((const q7_t *)pIn); - com.word = arm_nn_read_q7x4_ia((const q7_t **)&pCom); - - // if version - if (com.bytes[0] > in.bytes[0]) - in.bytes[0] = com.bytes[0]; - if (com.bytes[1] > in.bytes[1]) - in.bytes[1] = com.bytes[1]; - if (com.bytes[2] > in.bytes[2]) - in.bytes[2] = com.bytes[2]; - if (com.bytes[3] > in.bytes[3]) - in.bytes[3] = com.bytes[3]; - - arm_nn_write_q7x4_ia(&pIn, in.word); - - cnt--; - } - - cnt = length & 0x3; - while (cnt > 0u) - { - if (*pCom > *pIn) - { - *pIn = *pCom; - } - pIn++; - pCom++; - cnt--; - } -} - -static void accumulate_q7_to_q15(q15_t *base, q7_t *target, const uint16_t length) -{ - q15_t *pCnt = base; - q7_t *pV = target; - q31_t v1, v2, vo1, vo2; - uint16_t cnt = length >> 2; - q31_t in; - - while (cnt > 0u) - { - q31_t value = arm_nn_read_q7x4_ia((const q7_t **)&pV); - v1 = __SXTB16(__ROR(value, 8)); - v2 = __SXTB16(value); -#ifndef ARM_MATH_BIG_ENDIAN - - vo2 = __PKHTB(v1, v2, 16); - vo1 = __PKHBT(v2, v1, 16); - -#else - - vo1 = __PKHTB(v1, v2, 16); - vo2 = __PKHBT(v2, v1, 16); - -#endif - - in = arm_nn_read_q15x2(pCnt); - arm_nn_write_q15x2_ia(&pCnt, __QADD16(vo1, in)); - - in = arm_nn_read_q15x2(pCnt); - arm_nn_write_q15x2_ia(&pCnt, __QADD16(vo2, in)); - - cnt--; - } - cnt = length & 0x3; - while (cnt > 0u) - { - *pCnt++ += *pV++; - cnt--; - } -} - -#endif // ARM_MATH_DSP - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Pooling - * @{ - */ - -/** - * @brief Q7 max pooling function - * @param[in, out] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA Not used - * @param[in,out] Im_out pointer to output tensor - * - * @details - * - * The pooling function is implemented as split x-pooling then - * y-pooling. - * - * This pooling function is input-destructive. Input data is undefined - * after calling this function. - * - */ - -void arm_maxpool_q7_HWC(q7_t *Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const uint16_t dim_im_out, - q7_t *bufferA, - q7_t *Im_out) -{ - (void)bufferA; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - int16_t i_x, i_y; - - /* first does the pooling along x axis */ - for (i_y = 0; i_y < dim_im_in; i_y++) - { - - for (i_x = 0; i_x < dim_im_out; i_x++) - { - /* for each output pixel */ - q7_t *target = Im_in + (i_y * dim_im_in + i_x) * ch_im_in; - q7_t *win_start; - q7_t *win_stop; - if (i_x * stride - padding < 0) - { - win_start = target; - } - else - { - win_start = Im_in + (i_y * dim_im_in + i_x * stride - padding) * ch_im_in; - } - - if (i_x * stride - padding + dim_kernel >= dim_im_in) - { - win_stop = Im_in + (i_y * dim_im_in + dim_im_in) * ch_im_in; - } - else - { - win_stop = Im_in + (i_y * dim_im_in + i_x * stride - padding + dim_kernel) * ch_im_in; - } - - /* first step is to copy over initial data */ - /* arm_copy_q7(win_start, target, ch_im_in); */ - memmove(target, win_start, ch_im_in); - - /* start the max operation from the second part */ - win_start += ch_im_in; - for (; win_start < win_stop; win_start += ch_im_in) - { - compare_and_replace_if_larger_q7(target, win_start, ch_im_in); - } - } - } - - /* then does the pooling along y axis */ - for (i_y = 0; i_y < dim_im_out; i_y++) - { - - /* for each output row */ - q7_t *target = Im_out + i_y * dim_im_out * ch_im_in; - q7_t *row_start; - q7_t *row_end; - /* setting the starting row */ - if (i_y * stride - padding < 0) - { - row_start = Im_in; - } - else - { - row_start = Im_in + (i_y * stride - padding) * dim_im_in * ch_im_in; - } - /* setting the stopping row */ - if (i_y * stride - padding + dim_kernel >= dim_im_in) - { - row_end = Im_in + dim_im_in * dim_im_in * ch_im_in; - } - else - { - row_end = Im_in + (i_y * stride - padding + dim_kernel) * dim_im_in * ch_im_in; - } - - /* copy over the first row */ - /* arm_copy_q7(row_start, target, dim_im_out * ch_im_in); */ - memmove(target, row_start, dim_im_out * ch_im_in); - - /* move over to next row */ - row_start += ch_im_in * dim_im_in; - - for (; row_start < row_end; row_start += dim_im_in * ch_im_in) - { - compare_and_replace_if_larger_q7(target, row_start, dim_im_out * ch_im_in); - } - } - -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - int16_t i_ch_in, i_x, i_y; - int16_t k_x, k_y; - - for (i_ch_in = 0; i_ch_in < ch_im_in; i_ch_in++) - { - for (i_y = 0; i_y < dim_im_out; i_y++) - { - for (i_x = 0; i_x < dim_im_out; i_x++) - { - int max = -129; - for (k_y = i_y * stride - padding; k_y < i_y * stride - padding + dim_kernel; k_y++) - { - for (k_x = i_x * stride - padding; k_x < i_x * stride - padding + dim_kernel; k_x++) - { - if (k_y >= 0 && k_x >= 0 && k_y < dim_im_in && k_x < dim_im_in) - { - if (Im_in[i_ch_in + ch_im_in * (k_x + k_y * dim_im_in)] > max) - { - max = Im_in[i_ch_in + ch_im_in * (k_x + k_y * dim_im_in)]; - } - } - } - } - Im_out[i_ch_in + ch_im_in * (i_x + i_y * dim_im_out)] = max; - } - } - } - -#endif /* ARM_MATH_DSP */ -} - -/** - * @brief Q7 average pooling function - * @param[in,out] Im_in pointer to input tensor - * @param[in] dim_im_in input tensor dimention - * @param[in] ch_im_in number of input tensor channels - * @param[in] dim_kernel filter kernel size - * @param[in] padding padding sizes - * @param[in] stride convolution stride - * @param[in] dim_im_out output tensor dimension - * @param[in,out] bufferA pointer to buffer space for input - * @param[in,out] Im_out pointer to output tensor - * - * @details - * - * Buffer size: - * - * bufferA size: 2*dim_im_out*ch_im_in - * - * The pooling function is implemented as split x-pooling then - * y-pooling. - * - * This pooling function is input-destructive. Input data is undefined - * after calling this function. - * - */ - -void arm_avepool_q7_HWC(q7_t *Im_in, - const uint16_t dim_im_in, - const uint16_t ch_im_in, - const uint16_t dim_kernel, - const uint16_t padding, - const uint16_t stride, - const uint16_t dim_im_out, - q7_t *bufferA, - q7_t *Im_out) -{ - -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - /* Run the following code for Cortex-M4 and Cortex-M7 */ - - q15_t *buffer = (q15_t *)bufferA; - int16_t i_x, i_y; - int16_t count = 0; - - /* first does the pooling along x axis */ - for (i_y = 0; i_y < dim_im_in; i_y++) - { - - for (i_x = 0; i_x < dim_im_out; i_x++) - { - /* for each output pixel */ - q7_t *target = Im_in + (i_y * dim_im_in + i_x) * ch_im_in; - q7_t *win_start; - q7_t *win_stop; - if (i_x * stride - padding < 0) - { - win_start = target; - } - else - { - win_start = Im_in + (i_y * dim_im_in + i_x * stride - padding) * ch_im_in; - } - - if (i_x * stride - padding + dim_kernel >= dim_im_in) - { - win_stop = Im_in + (i_y * dim_im_in + dim_im_in) * ch_im_in; - } - else - { - win_stop = Im_in + (i_y * dim_im_in + i_x * stride - padding + dim_kernel) * ch_im_in; - } - - /* first step is to copy over initial data */ - arm_q7_to_q15_no_shift(win_start, buffer, ch_im_in); - count = 1; - - /* start the max operation from the second part */ - win_start += ch_im_in; - for (; win_start < win_stop; win_start += ch_im_in) - { - accumulate_q7_to_q15(buffer, win_start, ch_im_in); - count++; - } - buffer_scale_back_q15_to_q7(buffer, target, ch_im_in, count); - } - } - - /* then does the pooling along y axis */ - for (i_y = 0; i_y < dim_im_out; i_y++) - { - /* for each output row */ - q7_t *target = Im_out + i_y * dim_im_out * ch_im_in; - q7_t *row_start; - q7_t *row_end; - /* setting the starting row */ - if (i_y * stride - padding < 0) - { - row_start = Im_in; - } - else - { - row_start = Im_in + (i_y * stride - padding) * dim_im_in * ch_im_in; - } - /* setting the stopping row */ - if (i_y * stride - padding + dim_kernel >= dim_im_in) - { - row_end = Im_in + dim_im_in * dim_im_in * ch_im_in; - } - else - { - row_end = Im_in + (i_y * stride - padding + dim_kernel) * dim_im_in * ch_im_in; - } - - /* copy over the first row */ - arm_q7_to_q15_no_shift(row_start, buffer, dim_im_out * ch_im_in); - count = 1; - - /* move over to next row */ - row_start += ch_im_in * dim_im_in; - - for (; row_start < row_end; row_start += dim_im_in * ch_im_in) - { - accumulate_q7_to_q15(buffer, row_start, dim_im_out * ch_im_in); - count++; - } - buffer_scale_back_q15_to_q7(buffer, target, dim_im_out * ch_im_in, count); - } - -#else - /* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */ - - (void)bufferA; - int16_t i_ch_in, i_x, i_y; - int16_t k_x, k_y; - - for (i_ch_in = 0; i_ch_in < ch_im_in; i_ch_in++) - { - for (i_y = 0; i_y < dim_im_out; i_y++) - { - for (i_x = 0; i_x < dim_im_out; i_x++) - { - int sum = 0; - int count = 0; - for (k_y = i_y * stride - padding; k_y < i_y * stride - padding + dim_kernel; k_y++) - { - for (k_x = i_x * stride - padding; k_x < i_x * stride - padding + dim_kernel; k_x++) - { - if (k_y >= 0 && k_x >= 0 && k_y < dim_im_in && k_x < dim_im_in) - { - sum += Im_in[i_ch_in + ch_im_in * (k_x + k_y * dim_im_in)]; - count++; - } - } - } - Im_out[i_ch_in + ch_im_in * (i_x + i_y * dim_im_out)] = sum / count; - } - } - } - -#endif /* ARM_MATH_DSP */ -} - -/** - * @} end of Pooling group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ReshapeFunctions/CMakeLists.txt b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ReshapeFunctions/CMakeLists.txt deleted file mode 100644 index 4866a9a..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ReshapeFunctions/CMakeLists.txt +++ /dev/null @@ -1,29 +0,0 @@ -# -# Copyright (c) 2019-2021 Arm Limited. All rights reserved. -# -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the License); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an AS IS BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -project(CMSISNNReshape) - -file(GLOB SRC "./*_*.c") - -add_library(CMSISNNReshape STATIC ${SRC}) - -### Includes -target_include_directories(CMSISNNReshape PUBLIC "${NN}/Include") -target_include_directories(CMSISNNReshape PUBLIC "${ROOT}/CMSIS/Core/Include") -target_include_directories(CMSISNNReshape PUBLIC "${ROOT}/CMSIS/DSP/Include") - diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c deleted file mode 100644 index cd839dc..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/ReshapeFunctions/arm_reshape_s8.c +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_reshape_s8.c - * Description: Reshape a s8 vector - * - * $Date: September 2019 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Reshape - * @{ - */ - -/** - * Basic s8 reshape function. - * - * Refer header file for details. - * - */ - -void arm_reshape_s8(const int8_t *input, int8_t *output, const uint32_t total_size) -{ - memcpy(output, input, total_size); -} - -/** - * @} end of Reshape group - */ \ No newline at end of file diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SVDFunctions/CMakeLists.txt b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SVDFunctions/CMakeLists.txt deleted file mode 100644 index b3adff6..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SVDFunctions/CMakeLists.txt +++ /dev/null @@ -1,31 +0,0 @@ -# -# Copyright (c) 2019-2021 Arm Limited. All rights reserved. -# -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the License); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an AS IS BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -project(CMSISNNSVDF) - -file(GLOB SRC "./*_s8.c") - -add_library(CMSISNNSVDF STATIC ${SRC}) - -### Includes -target_include_directories(CMSISNNSVDF PUBLIC "${NN}/Include") -target_include_directories(CMSISNNSVDF PUBLIC "${ROOT}/CMSIS/Core/Include") -target_include_directories(CMSISNNSVDF PUBLIC "${ROOT}/CMSIS/DSP/Include") - - - diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SVDFunctions/arm_svdf_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SVDFunctions/arm_svdf_s8.c deleted file mode 100644 index c8bed03..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SVDFunctions/arm_svdf_s8.c +++ /dev/null @@ -1,254 +0,0 @@ -/* - * Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_svdf_s8.c - * Description: S8 basic SVDF layer function - * - * $Date: 15. April 2021 - * $Revision: V.1.5.0 - * - * Target Processor: Cortex-M processors - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup SVDF - * @{ - */ - -/* - * S8 SVDF layer function for TensorFlow Lite - * - * Refer to header file for details. - * - */ - -arm_status arm_svdf_s8(const cmsis_nn_context *input_ctx, - const cmsis_nn_context *output_ctx, - const cmsis_nn_svdf_params *svdf_params, - const cmsis_nn_per_tensor_quant_params *input_quant_params, - const cmsis_nn_per_tensor_quant_params *output_quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *state_dims, - q15_t *state_data, - const cmsis_nn_dims *weights_feature_dims, - const q7_t *weights_feature_data, - const cmsis_nn_dims *weights_time_dims, - const q15_t *weights_time_data, - const cmsis_nn_dims *bias_dims, - const q31_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data) -{ - (void)bias_dims; - (void)state_dims; - (void)output_dims; - - const q31_t multiplier_in = input_quant_params->multiplier; - const q31_t shift_in = input_quant_params->shift; - const q31_t multiplier_out = output_quant_params->multiplier; - const q31_t shift_2 = output_quant_params->shift; - const int32_t zp_in = svdf_params->input_offset; - const int32_t zp_out = svdf_params->output_offset; - const int32_t in_activation_min = svdf_params->input_activation.min; - const int32_t in_activation_max = svdf_params->input_activation.max; - const int32_t out_activation_min = svdf_params->output_activation.min; - const int32_t out_activation_max = svdf_params->output_activation.max; - const int16_t rank = svdf_params->rank; - - const int32_t input_batches = input_dims->n; - const int32_t input_height = input_dims->h; - const int32_t feature_batches = weights_feature_dims->n; - const int32_t time_batches = weights_time_dims->h; - const int32_t unit_count = feature_batches / rank; - - q31_t *buffer_a = (q31_t *)input_ctx->buf; - q31_t *buffer_b = (q31_t *)output_ctx->buf; - - memmove((q15_t *)state_data, - (q15_t *)state_data + 1, - (size_t)(input_batches * feature_batches * time_batches * (int32_t)sizeof(int16_t))); - - for (int i_batch = 0; i_batch < input_batches; i_batch++) - { - q15_t *res_ptr = state_data + (time_batches * i_batch * feature_batches) + (time_batches - 1); - const q7_t *weight = weights_feature_data; - const q7_t *input = input_data + i_batch * input_height; - - arm_status res = arm_nn_vec_mat_mult_t_svdf_s8(input, - weight, - res_ptr, - -zp_in, - 0, - time_batches, - multiplier_in, - shift_in, - input_height, - feature_batches, - in_activation_min, - in_activation_max); - - if (res != ARM_MATH_SUCCESS) - { - return res; - } - } - - { - q31_t *ptr_a = buffer_a; - const q15_t *v2 = state_data; - for (int i_batch = 0; i_batch < input_batches; i_batch++) - { - const q15_t *v1 = weights_time_data; - - for (int i_feature_batch = 0; i_feature_batch < feature_batches; i_feature_batch++) - { - *ptr_a = 0; - int32_t sum = 0; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - int j = 0; - int32_t block_count = time_batches >> 1; - for (int i = 0; i < block_count; i++) - { - j += 2; - q31_t r1 = arm_nn_read_q15x2_ia(&v1); - q31_t r2 = arm_nn_read_q15x2_ia(&v2); - - sum = __SMLAD(r1, r2, sum); - } - - // Process the remaining data - for (; j < time_batches; j++) - { - sum += *v1 * *v2; - v1++; - v2++; - } -#else - for (int j = 0; j < time_batches; j++) - { - sum += *v1 * *v2; - v1++; - v2++; - } -#endif - - *ptr_a = sum; - ptr_a++; - } - } - } - - if (bias_data) - { - if (unit_count == feature_batches) - { - for (int i = 0; i < input_batches; i++) - { - q31_t *output_temp = buffer_b + i * feature_batches; - const q31_t *ptr_a = buffer_a + i * feature_batches; - - const int32_t *bi = bias_data; - for (int j = 0; j < feature_batches; j++) - { - output_temp[j] = ptr_a[j] + bi[j]; - } - } - } - else - { - for (int i_batch = 0; i_batch < input_batches; i_batch++) - { - q31_t *output_data_temp = buffer_b + i_batch * unit_count; - q31_t *ptr_a = buffer_a + i_batch * feature_batches; - - for (int i = 0; i < unit_count; i++) - { - int32_t sum = bias_data[i]; - for (int j = 0; j < rank; j++) - { - sum += *ptr_a; - ptr_a++; - } - output_data_temp[i] = sum; - } - } - } - } - else - { - for (int i_batch = 0; i_batch < input_batches; i_batch++) - { - q31_t *output_data_temp = buffer_b + i_batch * unit_count; - q31_t *ptr_a = buffer_a + i_batch * feature_batches; - - for (int i = 0; i < unit_count; i++) - { - int32_t sum = 0; - for (int j = 0; j < rank; j++) - { - sum += *ptr_a; - ptr_a++; - } - output_data_temp[i] = sum; - } - } - } - -#if defined(ARM_MATH_MVEI) - int32_t num_elements = input_batches * unit_count; - const int32_t loop_count = (num_elements + 3) / 4; - for (int i_op = 0; i_op < loop_count; i_op++) - { - mve_pred16_t p = vctp32q((uint32_t)num_elements); - int32x4_t op = vldrwq_z_s32(buffer_b, p); - op = arm_requantize_mve(op, multiplier_out, shift_2); - op = vaddq_n_s32(op, zp_out); - const int32x4_t min_vec = vdupq_n_s32((int8_t)out_activation_min); - const int32x4_t max_vec = vdupq_n_s32((int8_t)out_activation_max); - op = vmaxq_s32(op, min_vec); - op = vminq_s32(op, max_vec); - vstrbq_p_s32(output_data, op, p); - output_data += 4; - buffer_b += 4; - num_elements -= 4; - } -#else - for (int i = 0; i < input_batches * unit_count; i++) - { - output_data[i] = (q7_t)CLAMP( - arm_nn_requantize(buffer_b[i], multiplier_out, shift_2) + zp_out, out_activation_max, out_activation_min); - } -#endif - - return (ARM_MATH_SUCCESS); -} - -/** - * @} end of SVDF group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SVDFunctions/arm_svdf_state_s16_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SVDFunctions/arm_svdf_state_s16_s8.c deleted file mode 100644 index 988409b..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SVDFunctions/arm_svdf_state_s16_s8.c +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright (C) 2022 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_svdf_s8.c - * Description: S8 basic SVDF layer function with s16 state tensor - * - * $Date: 28 April 2022 - * $Revision: V.1.0.1 - * - * Target Processor: Cortex-M processors - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup SVDF - * @{ - */ - -/* - * S8 SVDF layer function for TensorFlow Lite with 16 bit state tensor - * - * Refer to header file for details. - * - */ - -arm_status arm_svdf_state_s16_s8(const cmsis_nn_context *input_ctx, - const cmsis_nn_context *output_ctx, - const cmsis_nn_svdf_params *svdf_params, - const cmsis_nn_per_tensor_quant_params *input_quant_params, - const cmsis_nn_per_tensor_quant_params *output_quant_params, - const cmsis_nn_dims *input_dims, - const q7_t *input_data, - const cmsis_nn_dims *state_dims, - q15_t *state_data, - const cmsis_nn_dims *weights_feature_dims, - const q7_t *weights_feature_data, - const cmsis_nn_dims *weights_time_dims, - const q15_t *weights_time_data, - const cmsis_nn_dims *bias_dims, - const q31_t *bias_data, - const cmsis_nn_dims *output_dims, - q7_t *output_data) -{ - (void)bias_dims; - (void)state_dims; - (void)output_dims; - - const q31_t multiplier_in = input_quant_params->multiplier; - const q31_t shift_in = input_quant_params->shift; - const q31_t multiplier_out = output_quant_params->multiplier; - const q31_t shift_2 = output_quant_params->shift; - const int32_t zp_in = svdf_params->input_offset; - const int32_t zp_out = svdf_params->output_offset; - const int32_t in_activation_min = svdf_params->input_activation.min; - const int32_t in_activation_max = svdf_params->input_activation.max; - const int32_t out_activation_min = svdf_params->output_activation.min; - const int32_t out_activation_max = svdf_params->output_activation.max; - const int16_t rank = svdf_params->rank; - - const int32_t input_batches = input_dims->n; - const int32_t input_height = input_dims->h; - const int32_t feature_batches = weights_feature_dims->n; - const int32_t time_batches = weights_time_dims->h; - const int32_t unit_count = feature_batches / rank; - - if (input_ctx->buf == NULL) - { - return ARM_MATH_ARGUMENT_ERROR; - } - q31_t *buffer_a = (q31_t *)input_ctx->buf; - - if (output_ctx->buf == NULL) - { - return ARM_MATH_ARGUMENT_ERROR; - } - q31_t *buffer_b = (q31_t *)output_ctx->buf; - - // Left shift state - memmove((q15_t *)state_data, - (q15_t *)state_data + 1, - (size_t)((input_batches * feature_batches * time_batches - 1) * (int32_t)sizeof(int16_t))); - - // Matrix multiplication input * feature weight - for (int i_batch = 0; i_batch < input_batches; i_batch++) - { - q15_t *res_ptr = state_data + (time_batches * i_batch * feature_batches) + (time_batches - 1); - const q7_t *weight = weights_feature_data; - const q7_t *input = input_data + i_batch * input_height; - - arm_status res = arm_nn_vec_mat_mult_t_svdf_s8(input, - weight, - res_ptr, - -zp_in, - 0, - time_batches, - multiplier_in, - shift_in, - input_height, - feature_batches, - in_activation_min, - in_activation_max); - - if (res != ARM_MATH_SUCCESS) - { - return res; - } - } - - { - // Matrix multiplication time weight * state tensors - q31_t *ptr_a = buffer_a; - const q15_t *v2 = state_data; - for (int i_batch = 0; i_batch < input_batches; i_batch++) - { - const q15_t *v1 = weights_time_data; - - for (int i_feature_batch = 0; i_feature_batch < feature_batches; i_feature_batch++) - { - *ptr_a = 0; - int32_t sum = 0; -#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI) - // Perform matrix multiplication in blocks of two - int j = 0; - int32_t block_count = time_batches >> 1; - for (int i = 0; i < block_count; i++) - { - j += 2; - q31_t r1 = arm_nn_read_q15x2_ia(&v1); - q31_t r2 = arm_nn_read_q15x2_ia(&v2); - - sum = __SMLAD(r1, r2, sum); - } - - // Process the remaining data - for (; j < time_batches; j++) - { - sum += *v1 * *v2; - v1++; - v2++; - } -#else - for (int j = 0; j < time_batches; j++) - { - sum += *v1 * *v2; - v1++; - v2++; - } -#endif - - *ptr_a = sum; - ptr_a++; - } - } - } - - if (bias_data) - { - if (unit_count == feature_batches) - { - for (int i = 0; i < input_batches; i++) - { - q31_t *output_temp = buffer_b + i * feature_batches; - const q31_t *ptr_a = buffer_a + i * feature_batches; - - const int32_t *bi = bias_data; - for (int j = 0; j < feature_batches; j++) - { - output_temp[j] = ptr_a[j] + bi[j]; - } - } - } - else - { - for (int i_batch = 0; i_batch < input_batches; i_batch++) - { - q31_t *output_data_temp = buffer_b + i_batch * unit_count; - q31_t *ptr_a = buffer_a + i_batch * feature_batches; - - for (int i = 0; i < unit_count; i++) - { - int32_t sum = bias_data[i]; - for (int j = 0; j < rank; j++) - { - sum += *ptr_a; - ptr_a++; - } - output_data_temp[i] = sum; - } - } - } - } - else - { - for (int i_batch = 0; i_batch < input_batches; i_batch++) - { - q31_t *output_data_temp = buffer_b + i_batch * unit_count; - q31_t *ptr_a = buffer_a + i_batch * feature_batches; - - for (int i = 0; i < unit_count; i++) - { - int32_t sum = 0; - for (int j = 0; j < rank; j++) - { - sum += *ptr_a; - ptr_a++; - } - output_data_temp[i] = sum; - } - } - } - -#if defined(ARM_MATH_MVEI) - int32_t num_elements = input_batches * unit_count; - const int32_t loop_count = (num_elements + 3) / 4; - for (int i_op = 0; i_op < loop_count; i_op++) - { - mve_pred16_t p = vctp32q((uint32_t)num_elements); - int32x4_t op = vldrwq_z_s32(buffer_b, p); - op = arm_requantize_mve(op, multiplier_out, shift_2); - op = vaddq_n_s32(op, zp_out); - const int32x4_t min_vec = vdupq_n_s32((int8_t)out_activation_min); - const int32x4_t max_vec = vdupq_n_s32((int8_t)out_activation_max); - op = vmaxq_s32(op, min_vec); - op = vminq_s32(op, max_vec); - vstrbq_p_s32(output_data, op, p); - output_data += 4; - buffer_b += 4; - num_elements -= 4; - } -#else - for (int i = 0; i < input_batches * unit_count; i++) - { - output_data[i] = (q7_t)CLAMP( - arm_nn_requantize(buffer_b[i], multiplier_out, shift_2) + zp_out, out_activation_max, out_activation_min); - } -#endif - - return (ARM_MATH_SUCCESS); -} - -/** - * @} end of SVDF group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/CMakeLists.txt b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/CMakeLists.txt deleted file mode 100644 index a74a994..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/CMakeLists.txt +++ /dev/null @@ -1,30 +0,0 @@ -# -# Copyright (c) 2019-2021 Arm Limited. All rights reserved. -# -# SPDX-License-Identifier: Apache-2.0 -# -# Licensed under the Apache License, Version 2.0 (the License); you may -# not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an AS IS BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -project(CMSISNNSoftmax) - -file(GLOB SRC "./*_s8.c") -add_library(CMSISNNSoftmax STATIC ${SRC}) - -### Includes -target_include_directories(CMSISNNSoftmax PUBLIC "${NN}/Include") -target_include_directories(CMSISNNSoftmax PUBLIC "${ROOT}/CMSIS/Core/Include") -target_include_directories(CMSISNNSoftmax PUBLIC "${ROOT}/CMSIS/DSP/Include") - - - diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_nn_softmax_common_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_nn_softmax_common_s8.c deleted file mode 100644 index 84d1ac8..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_nn_softmax_common_s8.c +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (C) 2022 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_nn_softmax_common_s8.c - * Description: Softmax with s8 input and output of s8 or s16. - * - * $Date: 17 March 2022 - * $Revision: V.1.0.1 - * - * Target Processor: Cortex-M processors - * -------------------------------------------------------------------- */ - -#include "arm_nnsupportfunctions.h" - -#define ACCUM_BITS 12 - -/** - * @ingroup groupSupport - */ - -/** - * @addtogroup Softmax - * @{ - */ - -/* - * Softmax function with s8 input and output of s8 or s16. - * - * Refer header file for details. - * - */ -void arm_nn_softmax_common_s8(const int8_t *input, - const int32_t num_rows, - const int32_t row_size, - const int32_t mult, - const int32_t shift, - const int32_t diff_min, - const bool int16_output, - void *output) -{ - const int32_t mask = (1 << shift); - - int32_t col = 0; - int32_t row_idx; - - for (row_idx = 0; row_idx < num_rows; ++row_idx) - { - // Find the maximum value in order to ensure numerical stability - int8_t max = *input; - - for (col = 1; col < row_size; ++col) - { - max = MAX(max, input[col]); - } - - int32_t diff = 0; - int32_t sum = 0; - - for (col = 0; col < row_size; ++col) - { - diff = input[col] - max; - if (diff >= diff_min) - { - sum += DIV_POW2(EXP_ON_NEG(MUL_SAT(diff * mask, mult)), ACCUM_BITS); - } - } - - const int32_t headroom = __CLZ(sum); - const int32_t shifted_scale = ONE_OVER1((sum > 0 ? sum << headroom : 0) - (1 << 31)); - int32_t bits_over_unit; - - if (int16_output) - { - int16_t *output_s16 = (int16_t *)output + row_idx * row_size; - - bits_over_unit = ACCUM_BITS - headroom + 15; - - for (col = 0; col < row_size; ++col) - { - diff = input[col] - max; - - if (diff >= diff_min) - { - const int32_t res = - DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) + - NN_Q15_MIN; - output_s16[col] = (int16_t)CLAMP(res, (int32_t)NN_Q15_MAX, (int32_t)NN_Q15_MIN); - } - else - { - output_s16[col] = NN_Q15_MIN; - } - } - } - else - { - int8_t *output_s8 = (int8_t *)output + row_idx * row_size; - - bits_over_unit = ACCUM_BITS - headroom + 23; - - for (col = 0; col < row_size; ++col) - { - diff = input[col] - max; - if (diff >= diff_min) - { - const int32_t res = - DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) + - NN_Q7_MIN; - output_s8[col] = (int8_t)CLAMP(res, (int32_t)NN_Q7_MAX, (int32_t)NN_Q7_MIN); - } - else - { - output_s8[col] = NN_Q7_MIN; - } - } - } - - input += row_size; - } -} - -/** - * @} end of NNBasicMath group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c deleted file mode 100644 index 18f3e83..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q15.c +++ /dev/null @@ -1,118 +0,0 @@ -/* - * Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_softmax_q15.c - * Description: Q15 softmax function - * - * $Date: 09. October 2020 - * $Revision: V.1.0.1 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Softmax - * @{ - */ - -/** - * @brief Q15 softmax function - * @param[in] vec_in pointer to input vector - * @param[in] dim_vec input vector dimention - * @param[out] p_out pointer to output vector - * - * @details - * - * Here, instead of typical e based softmax, we use - * 2-based softmax, i.e.,: - * - * y_i = 2^(x_i) / sum(2^x_j) - * - * The relative output will be different here. - * But mathematically, the gradient will be the same - * with a log(2) scaling factor. - * - */ - -void arm_softmax_q15(const q15_t *vec_in, const uint16_t dim_vec, q15_t *p_out) -{ - q31_t sum; - int16_t i; - uint8_t shift; - q31_t base; - base = -1 * 0x100000; - for (i = 0; i < dim_vec; i++) - { - if (vec_in[i] > base) - { - base = vec_in[i]; - } - } - - /* we ignore really small values - * anyway, they will be 0 after shrinking - * to q15_t - */ - base = base - 16; - - sum = 0; - - for (i = 0; i < dim_vec; i++) - { - if (vec_in[i] > base) - { - shift = (uint8_t)__USAT(vec_in[i] - base, 5); - sum += 0x1 << shift; - } - } - - /* This is effectively (0x1 << 32) / sum */ - int64_t div_base = 0x100000000LL; - int output_base = (int32_t)(div_base / sum); - - /* Final confidence will be output_base >> ( 17 - (vec_in[i] - base) ) - * so 32768 (0x1<<15) -> 100% confidence when sum = 0x1 << 16, output_base = 0x1 << 16 - * and vec_in[i]-base = 16 - */ - for (i = 0; i < dim_vec; i++) - { - if (vec_in[i] > base) - { - /* Here minimum value of 17+base-vec[i] will be 1 */ - shift = (uint8_t)__USAT(17 + base - vec_in[i], 5); - p_out[i] = (q15_t)__SSAT((output_base >> shift), 16); - } - else - { - p_out[i] = 0; - } - } -} - -/** - * @} end of Softmax group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c deleted file mode 100644 index 58eb990..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_q7.c +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_softmax_q7.c - * Description: Q7 softmax function - * - * $Date: 09. October 2020 - * $Revision: V.1.0.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Softmax - * @{ - */ - -/** - * @brief Q7 softmax function - * @param[in] vec_in pointer to input vector - * @param[in] dim_vec input vector dimention - * @param[out] p_out pointer to output vector - * - * @details - * - * Here, instead of typical natural logarithm e based softmax, we use - * 2-based softmax here, i.e.,: - * - * y_i = 2^(x_i) / sum(2^x_j) - * - * The relative output will be different here. - * But mathematically, the gradient will be the same - * with a log(2) scaling factor. - * - */ - -void arm_softmax_q7(const q7_t *vec_in, const uint16_t dim_vec, q7_t *p_out) -{ - q31_t sum; - int16_t i; - uint8_t shift; - q15_t base; - base = -128; - - /* We first search for the maximum */ - for (i = 0; i < dim_vec; i++) - { - if (vec_in[i] > base) - { - base = vec_in[i]; - } - } - - /* - * So the base is set to max-8, meaning - * that we ignore really small values. - * anyway, they will be 0 after shrinking to q7_t. - */ - base = base - (1 << 3); - - sum = 0; - - for (i = 0; i < dim_vec; i++) - { - shift = (uint8_t)__USAT(vec_in[i] - base, 3); - sum += 0x1 << shift; - } - - /* This is effectively (0x1 << 20) / sum */ - int output_base = (1 << 20) / sum; - - for (i = 0; i < dim_vec; i++) - { - - /* Here minimum value of 13+base-vec_in[i] will be 5 */ - shift = (uint8_t)__USAT(13 + base - vec_in[i], 5); - p_out[i] = (q7_t)__SSAT((output_base >> shift), 8); - } -} - -/** - * @} end of Softmax group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s16.c deleted file mode 100644 index e840893..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s16.c +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (C) 2022 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_softmax_s16.c - * Description: S16 softmax function - * - * $Date: 9 March 2022 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @addtogroup Softmax - * @{ - */ - -arm_status arm_softmax_s16(const int16_t *input, - const int32_t num_rows, - const int32_t row_size, - const int32_t mult, - const int32_t shift, - const cmsis_nn_softmax_lut_s16 *softmax_params, - int16_t *output) -{ - int32_t col = 0; - int32_t row_idx; - - if (softmax_params->exp_lut == NULL || softmax_params->one_by_one_lut == NULL) - { - return ARM_MATH_ARGUMENT_ERROR; - } - - for (row_idx = 0; row_idx < num_rows; ++row_idx) - { - // Find the maximum value in order to ensure numerical stability - int16_t max = *input; - for (col = 1; col < row_size; ++col) - { - max = MAX(max, input[col]); - } - - int32_t diff = 0; - int32_t sum = 0; - int16_t *cached_exp_results = output; - - for (col = 0; col < row_size; ++col) - { - diff = input[col] - max; - const int32_t scaled_diff = arm_nn_requantize(diff, mult, shift); - const int32_t symmetric_scaled_diff = scaled_diff + NN_Q15_MAX; - const int16_t saturated_symmetric_scaled_diff = MIN(MAX(symmetric_scaled_diff, NN_Q15_MIN), NN_Q15_MAX); - - // Lookup from exp table and cache result for next step - const int16_t index = (256 + (saturated_symmetric_scaled_diff >> 7)); - const int16_t offset = saturated_symmetric_scaled_diff & 0x7f; - const int16_t base = softmax_params->exp_lut[index]; - const int16_t slope = softmax_params->exp_lut[index + 1] - softmax_params->exp_lut[index]; - const int16_t delta = (slope * offset + 64) >> 7; - const int16_t result = (base + delta); - cached_exp_results[col] = result; - - sum += cached_exp_results[col]; - } - - const int32_t headroom = __CLZ(sum); - - // Compute the reciprocal 1/sum - const int32_t shifted_sum = (((sum) << (headroom - 1)) + (1 << 13)) >> 14; - - // Since LUT computes 1/(1 + x), compute x = (sum - 1) => -65536 - // Since LUT expects a symmetrical input, recenter from [UINT16_MIN, UINT16_MAX] to [INT16_MIN, INT16_MAX] => - // -32768 ==> So in total -65536 -32768 => -98304 - const int16_t symmetric_shifted_sum = shifted_sum - 98304; - - // Lookup from one by one table - const int16_t index = (256 + (symmetric_shifted_sum >> 7)); - const int16_t offset = symmetric_shifted_sum & 0x7f; - const int16_t base = softmax_params->one_by_one_lut[index]; - const int16_t slope = softmax_params->one_by_one_lut[index + 1] - softmax_params->one_by_one_lut[index]; - const int16_t delta = (slope * offset + 64) >> 7; - const int16_t one_by_one_result = (base + delta); - - for (col = 0; col < row_size; ++col) - { - const int16_t right_shift = 30 - headroom; - int32_t result = (cached_exp_results[col] * one_by_one_result) >> right_shift; - result = (result + 1) >> 1; // Last shift position and insert round - output[col] = (int16_t)result; - } - - output += row_size; - input += row_size; - } - - return ARM_MATH_SUCCESS; -} - -/** - * @} end of Softmax group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c deleted file mode 100644 index 09ac947..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8.c +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_softmax_s8.c - * Description: S8 softmax function - * - * $Date: 01. March 2021 - * $Revision: V.2.0.2 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -#define ACCUM_BITS 12 - -#ifdef ARM_MATH_MVEI -static int32x4_t arm_exp_on_negative_values_mve_32x4(int32x4_t val) -{ -#define SHIFT_START (24) - int32_t shift = SHIFT_START; - int32x4_t mask; - - const int32x4_t val_mod_minus_quarter = - vandq_s32(val, vdupq_n_s32((1 << SHIFT_START) - 1)) - vdupq_n_s32(1 << SHIFT_START); - const int32x4_t remainder = vsubq_s32(val_mod_minus_quarter, val); - const int32x4_t x = vaddq_n_s32(val_mod_minus_quarter << 5, 1 << 28); - const int32x4_t x2 = MUL_SAT_MVE(x, x); - const int32x4_t op_1 = DIV_POW2_MVE(MUL_SAT_MVE(x2, x2), 2) + MUL_SAT_MVE(x2, x); - const int32x4_t op_2 = x + DIV_POW2_MVE(MUL_SAT_MVE(op_1, vdupq_n_s32(715827883)) + x2, 1); - int32x4_t result = vdupq_n_s32(1895147668) + MUL_SAT_MVE(vdupq_n_s32(1895147668), op_2); - -#define SELECT_IF_NON_ZERO(x) \ - { \ - mve_pred16_t p = vcmpneq_n_s32(remainder & vdupq_n_s32(1 << shift++), 0); \ - mask = vmvnq_m_s32(vdupq_n_s32(0), vdupq_n_s32(0), p); \ - result = SELECT_USING_MASK(mask, MUL_SAT_MVE(result, vdupq_n_s32(x)), result); \ - } - - SELECT_IF_NON_ZERO(1672461947) - SELECT_IF_NON_ZERO(1302514674) - SELECT_IF_NON_ZERO(790015084) - SELECT_IF_NON_ZERO(290630308) - SELECT_IF_NON_ZERO(39332535) - SELECT_IF_NON_ZERO(720401) - SELECT_IF_NON_ZERO(242) - -#undef SELECT_IF_NON_ZERO - - mve_pred16_t p = vcmpeqq_n_s32(val, 0); - mask = vmvnq_m_s32(vdupq_n_s32(0), vdupq_n_s32(0), p); - - result = SELECT_USING_MASK(mask, vdupq_n_s32(Q31_MAX), result); - return result; -} -#endif - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Softmax - * @{ - */ - -void arm_softmax_s8(const int8_t *input, - const int32_t num_rows, - const int32_t row_size, - const int32_t mult, - const int32_t shift, - const int32_t diff_min, - int8_t *output) -{ -#ifdef ARM_MATH_MVEI - -#define ACT_MIN ((int8_t)Q7_MIN) -#define ACT_MAX ((int8_t)Q7_MAX) - - const int32_t mask = (1 << shift); - - for (int i_num_rows = 0; i_num_rows < num_rows; ++i_num_rows) - { - int8_t max = ACT_MIN; - - int32_t vec_count = (row_size + 15) / 16; - uint32_t r_count = (uint32_t)row_size; - for (int i = 0; i < vec_count; i++) - { - mve_pred16_t p = vctp8q(r_count); - const int8x16_t ip = vldrbq_z_s8(&input[i * 16], p); - max = vmaxvq_p_s8(max, ip, p); - r_count -= 16; - } - - vec_count = row_size / 4; - int32_t idx = 0; - int32_t sum = 0; - - while (vec_count) - { - int32x4_t ip = vldrbq_s32(&input[idx * 4]); - ip = vsubq_n_s32(ip, max); - mve_pred16_t p = vcmpgeq_n_s32(ip, diff_min); - if (p != 0) - { - ip = vmulq_n_s32(ip, mask); - - int32x4_t res = MUL_SAT_MVE(ip, vdupq_n_s32(mult)); - - res = arm_exp_on_negative_values_mve_32x4(res); - res = DIV_POW2_MVE(res, ACCUM_BITS); - res = vpselq_s32(res, vdupq_n_s32(0), p); - sum += vaddvq_s32(res); - } - - vec_count--; - idx++; - } - - const int32_t tail_idx = row_size & ~3; - for (int i = 0; i < (row_size & 3); i++) - { - const int32_t diff = input[tail_idx + i] - max; - if (diff >= diff_min) - { - sum += DIV_POW2(EXP_ON_NEG(MUL_SAT(diff * mask, mult)), ACCUM_BITS); - } - } - - const int32_t headroom = __CLZ((uint32_t)sum); - const int32_t bits_over_unit = ACCUM_BITS - headroom + 23; - const int32_t shifted_scale = ONE_OVER1((sum > 0 ? sum << headroom : 0) - (1 << 31)); - - vec_count = row_size / 4; - idx = 0; - - while (vec_count) - { - int32x4_t ip = vldrbq_s32(&input[idx]); - ip = vsubq_n_s32(ip, max); - - mve_pred16_t p = vcmpgeq_n_s32(ip, diff_min); - - int32x4_t tmp_res; - - if (p != 0) - { - ip = vmulq_n_s32(ip, mask); - - tmp_res = MUL_SAT_MVE(ip, vdupq_n_s32(mult)); - tmp_res = arm_exp_on_negative_values_mve_32x4(tmp_res); - tmp_res = MUL_SAT_MVE(vdupq_n_s32(shifted_scale), tmp_res); - tmp_res = DIV_POW2_MVE(tmp_res, bits_over_unit); - tmp_res += vdupq_n_s32(ACT_MIN); - - tmp_res = vmaxq_s32(tmp_res, vdupq_n_s32(ACT_MIN)); - tmp_res = vminq_s32(tmp_res, vdupq_n_s32(ACT_MAX)); - tmp_res = vpselq_s32(tmp_res, vdupq_n_s32(ACT_MIN), p); - } - else - { - tmp_res = vdupq_n_s32(ACT_MIN); - } - vstrbq_s32(&output[idx], tmp_res); - vec_count--; - idx += 4; - } - - for (int i = 0; i < (row_size & 3); i++) - { - int32_t diff = input[tail_idx + i] - max; - if (diff >= diff_min) - { - const int32_t res = - DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) - 128; - output[tail_idx + i] = (int8_t)CLAMP(res, (int32_t)ACT_MAX, (int32_t)ACT_MIN); - } - else - { - output[tail_idx + i] = ACT_MIN; - } - } - - input += row_size; - output += row_size; - } -#else - const int32_t mask = (1 << shift); - - int32_t col = 0; - int32_t row_idx; - - for (row_idx = 0; row_idx < num_rows; ++row_idx) - { - // Find the maximum value in order to ensure numerical stability - int8_t max = *input; - - for (col = 1; col < row_size; ++col) - { - max = MAX(max, input[col]); - } - - int32_t diff = 0; - int32_t sum = 0; - - for (col = 0; col < row_size; ++col) - { - diff = input[col] - max; - if (diff >= diff_min) - { - sum += DIV_POW2(EXP_ON_NEG(MUL_SAT(diff * mask, mult)), ACCUM_BITS); - } - } - - const int32_t headroom = __CLZ(sum); - const int32_t bits_over_unit = ACCUM_BITS - headroom + 23; - const int32_t shifted_scale = ONE_OVER1((sum > 0 ? sum << headroom : 0) - (1 << 31)); - - for (col = 0; col < row_size; ++col) - { - diff = input[col] - max; - if (diff >= diff_min) - { - const int32_t res = - DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit) - 128; - output[col] = (int8_t)CLAMP(res, (int32_t)127, (int32_t)-128); - } - else - { - output[col] = -128; - } - } - input += row_size; - output += row_size; - } - -#endif -} -/** - * @} end of Softmax group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8_s16.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8_s16.c deleted file mode 100644 index 9ba0b9a..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_s8_s16.c +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (C) 2022 Arm Limited or its affiliates. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_softmax_s8_s16.c - * Description: S8 to s16 softmax function - * - * $Date: 7 January 2022 - * $Revision: V.1.0.0 - * - * Target Processor: Cortex-M cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Softmax - * @{ - */ - -void arm_softmax_s8_s16(const int8_t *input, - const int32_t num_rows, - const int32_t row_size, - const int32_t mult, - const int32_t shift, - const int32_t diff_min, - int16_t *output) -{ - arm_nn_softmax_common_s8(input, num_rows, row_size, mult, shift, diff_min, true, (void *)output); -} -/** - * @} end of Softmax group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_u8.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_u8.c deleted file mode 100644 index c4df8f8..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_u8.c +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_softmax_u8.c - * Description: U8 softmax function - * - * $Date: 09. October 2020 - * $Revision: V.1.0.2 - * - * Target Processor: Cortex-M CPUs - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" -#include "arm_nnsupportfunctions.h" - -#define ACCUM_BITS 12 - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Softmax - * @{ - */ -void arm_softmax_u8(const uint8_t *input, - const int32_t num_rows, - const int32_t row_size, - const int32_t mult, - const int32_t shift, - const int32_t diff_min, - uint8_t *output) -{ - const int32_t mask = (1 << shift); - - int32_t col = 0; - int32_t row_idx; - - for (row_idx = 0; row_idx < num_rows; ++row_idx) - { - // Find the maximum value in order to ensure numerical stability - uint8_t max = *input; - - for (col = 1; col < row_size; ++col) - { - max = MAX(max, input[col]); - } - - int32_t diff = 0; - int32_t sum = 0; - - for (col = 0; col < row_size; ++col) - { - diff = input[col] - max; - if (diff >= diff_min) - { - sum += DIV_POW2(EXP_ON_NEG(MUL_SAT(diff * mask, mult)), ACCUM_BITS); - } - } - - const int32_t headroom = __CLZ((uint32_t)sum); - const int32_t bits_over_unit = ACCUM_BITS - headroom + 23; - const int32_t shifted_scale = ONE_OVER1((sum << headroom) - (1 << 31)); - - for (col = 0; col < row_size; ++col) - { - diff = input[col] - max; - if (diff >= diff_min) - { - const int32_t res = - DIV_POW2(MUL_SAT(shifted_scale, EXP_ON_NEG(MUL_SAT(diff * mask, mult))), bits_over_unit); - output[col] = (uint8_t)CLAMP(res, (int32_t)255, (int32_t)0); - } - else - { - output[col] = 0; - } - } - input += row_size; - output += row_size; - } -} -/** - * @} end of Softmax group - */ \ No newline at end of file diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c deleted file mode 100644 index 66e892e..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/NN/Source/SoftmaxFunctions/arm_softmax_with_batch_q7.c +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* ---------------------------------------------------------------------- - * Project: CMSIS NN Library - * Title: arm_softmax_with_batch_q7.c - * Description: Q7 softmax function - * - * $Date: 09. October 2020 - * $Revision: V.1.0.1 - * - * Target Processor: Cortex-M and Cortex-A cores - * - * -------------------------------------------------------------------- */ - -#include "arm_nnfunctions.h" - -/** - * @ingroup groupNN - */ - -/** - * @addtogroup Softmax - * @{ - */ - -/** - * @brief Q7 softmax function with batch parameter - * @param[in] vec_in pointer to input vector - * @param[in] nb_batches number of batches - * @param[in] dim_vec input vector dimention - * @param[out] p_out pointer to output vector - * - * @details - * - * Here, instead of typical natural logarithm e based softmax, we use - * 2-based softmax here, i.e.,: - * - * y_i = 2^(x_i) / sum(2^x_j) - * - * The relative output will be different here. - * But mathematically, the gradient will be the same - * with a log(2) scaling factor. - * - */ - -void arm_softmax_with_batch_q7(const q7_t *vec_in, const uint16_t nb_batches, const uint16_t dim_vec, q7_t *p_out) -{ - for (int i = 0; i < nb_batches; i++) - { - arm_softmax_q7(vec_in, dim_vec, p_out); - vec_in += dim_vec; - p_out += dim_vec; - } -} - -/** - * @} end of Softmax group - */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS/Template/cmsis_os.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS/Template/cmsis_os.h deleted file mode 100644 index 30068d3..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS/Template/cmsis_os.h +++ /dev/null @@ -1,698 +0,0 @@ -/* ---------------------------------------------------------------------- - * $Date: 5. February 2013 - * $Revision: V1.02 - * - * Project: CMSIS-RTOS API - * Title: cmsis_os.h template header file - * - * Version 0.02 - * Initial Proposal Phase - * Version 0.03 - * osKernelStart added, optional feature: main started as thread - * osSemaphores have standard behavior - * osTimerCreate does not start the timer, added osTimerStart - * osThreadPass is renamed to osThreadYield - * Version 1.01 - * Support for C++ interface - * - const attribute removed from the osXxxxDef_t typedef's - * - const attribute added to the osXxxxDef macros - * Added: osTimerDelete, osMutexDelete, osSemaphoreDelete - * Added: osKernelInitialize - * Version 1.02 - * Control functions for short timeouts in microsecond resolution: - * Added: osKernelSysTick, osKernelSysTickFrequency, osKernelSysTickMicroSec - * Removed: osSignalGet - *---------------------------------------------------------------------------- - * - * Copyright (c) 2013-2017 ARM LIMITED - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - *---------------------------------------------------------------------------*/ - - -#ifndef _CMSIS_OS_H -#define _CMSIS_OS_H - -/// \note MUST REMAIN UNCHANGED: \b osCMSIS identifies the CMSIS-RTOS API version. -#define osCMSIS 0x10002 ///< API version (main [31:16] .sub [15:0]) - -/// \note CAN BE CHANGED: \b osCMSIS_KERNEL identifies the underlying RTOS kernel and version number. -#define osCMSIS_KERNEL 0x10000 ///< RTOS identification and version (main [31:16] .sub [15:0]) - -/// \note MUST REMAIN UNCHANGED: \b osKernelSystemId shall be consistent in every CMSIS-RTOS. -#define osKernelSystemId "KERNEL V1.00" ///< RTOS identification string - -/// \note MUST REMAIN UNCHANGED: \b osFeature_xxx shall be consistent in every CMSIS-RTOS. -#define osFeature_MainThread 1 ///< main thread 1=main can be thread, 0=not available -#define osFeature_Pool 1 ///< Memory Pools: 1=available, 0=not available -#define osFeature_MailQ 1 ///< Mail Queues: 1=available, 0=not available -#define osFeature_MessageQ 1 ///< Message Queues: 1=available, 0=not available -#define osFeature_Signals 8 ///< maximum number of Signal Flags available per thread -#define osFeature_Semaphore 30 ///< maximum count for \ref osSemaphoreCreate function -#define osFeature_Wait 1 ///< osWait function: 1=available, 0=not available -#define osFeature_SysTick 1 ///< osKernelSysTick functions: 1=available, 0=not available - -#include -#include - -#ifdef __cplusplus -extern "C" -{ -#endif - - -// ==== Enumeration, structures, defines ==== - -/// Priority used for thread control. -/// \note MUST REMAIN UNCHANGED: \b osPriority shall be consistent in every CMSIS-RTOS. -typedef enum { - osPriorityIdle = -3, ///< priority: idle (lowest) - osPriorityLow = -2, ///< priority: low - osPriorityBelowNormal = -1, ///< priority: below normal - osPriorityNormal = 0, ///< priority: normal (default) - osPriorityAboveNormal = +1, ///< priority: above normal - osPriorityHigh = +2, ///< priority: high - osPriorityRealtime = +3, ///< priority: realtime (highest) - osPriorityError = 0x84 ///< system cannot determine priority or thread has illegal priority -} osPriority; - -/// Timeout value. -/// \note MUST REMAIN UNCHANGED: \b osWaitForever shall be consistent in every CMSIS-RTOS. -#define osWaitForever 0xFFFFFFFF ///< wait forever timeout value - -/// Status code values returned by CMSIS-RTOS functions. -/// \note MUST REMAIN UNCHANGED: \b osStatus shall be consistent in every CMSIS-RTOS. -typedef enum { - osOK = 0, ///< function completed; no error or event occurred. - osEventSignal = 0x08, ///< function completed; signal event occurred. - osEventMessage = 0x10, ///< function completed; message event occurred. - osEventMail = 0x20, ///< function completed; mail event occurred. - osEventTimeout = 0x40, ///< function completed; timeout occurred. - osErrorParameter = 0x80, ///< parameter error: a mandatory parameter was missing or specified an incorrect object. - osErrorResource = 0x81, ///< resource not available: a specified resource was not available. - osErrorTimeoutResource = 0xC1, ///< resource not available within given time: a specified resource was not available within the timeout period. - osErrorISR = 0x82, ///< not allowed in ISR context: the function cannot be called from interrupt service routines. - osErrorISRRecursive = 0x83, ///< function called multiple times from ISR with same object. - osErrorPriority = 0x84, ///< system cannot determine priority or thread has illegal priority. - osErrorNoMemory = 0x85, ///< system is out of memory: it was impossible to allocate or reserve memory for the operation. - osErrorValue = 0x86, ///< value of a parameter is out of range. - osErrorOS = 0xFF, ///< unspecified RTOS error: run-time error but no other error message fits. - os_status_reserved = 0x7FFFFFFF ///< prevent from enum down-size compiler optimization. -} osStatus; - - -/// Timer type value for the timer definition. -/// \note MUST REMAIN UNCHANGED: \b os_timer_type shall be consistent in every CMSIS-RTOS. -typedef enum { - osTimerOnce = 0, ///< one-shot timer - osTimerPeriodic = 1 ///< repeating timer -} os_timer_type; - -/// Entry point of a thread. -/// \note MUST REMAIN UNCHANGED: \b os_pthread shall be consistent in every CMSIS-RTOS. -typedef void (*os_pthread) (void const *argument); - -/// Entry point of a timer call back function. -/// \note MUST REMAIN UNCHANGED: \b os_ptimer shall be consistent in every CMSIS-RTOS. -typedef void (*os_ptimer) (void const *argument); - -// >>> the following data type definitions may shall adapted towards a specific RTOS - -/// Thread ID identifies the thread (pointer to a thread control block). -/// \note CAN BE CHANGED: \b os_thread_cb is implementation specific in every CMSIS-RTOS. -typedef struct os_thread_cb *osThreadId; - -/// Timer ID identifies the timer (pointer to a timer control block). -/// \note CAN BE CHANGED: \b os_timer_cb is implementation specific in every CMSIS-RTOS. -typedef struct os_timer_cb *osTimerId; - -/// Mutex ID identifies the mutex (pointer to a mutex control block). -/// \note CAN BE CHANGED: \b os_mutex_cb is implementation specific in every CMSIS-RTOS. -typedef struct os_mutex_cb *osMutexId; - -/// Semaphore ID identifies the semaphore (pointer to a semaphore control block). -/// \note CAN BE CHANGED: \b os_semaphore_cb is implementation specific in every CMSIS-RTOS. -typedef struct os_semaphore_cb *osSemaphoreId; - -/// Pool ID identifies the memory pool (pointer to a memory pool control block). -/// \note CAN BE CHANGED: \b os_pool_cb is implementation specific in every CMSIS-RTOS. -typedef struct os_pool_cb *osPoolId; - -/// Message ID identifies the message queue (pointer to a message queue control block). -/// \note CAN BE CHANGED: \b os_messageQ_cb is implementation specific in every CMSIS-RTOS. -typedef struct os_messageQ_cb *osMessageQId; - -/// Mail ID identifies the mail queue (pointer to a mail queue control block). -/// \note CAN BE CHANGED: \b os_mailQ_cb is implementation specific in every CMSIS-RTOS. -typedef struct os_mailQ_cb *osMailQId; - - -/// Thread Definition structure contains startup information of a thread. -/// \note CAN BE CHANGED: \b os_thread_def is implementation specific in every CMSIS-RTOS. -typedef struct os_thread_def { - os_pthread pthread; ///< start address of thread function - osPriority tpriority; ///< initial thread priority - uint32_t instances; ///< maximum number of instances of that thread function - uint32_t stacksize; ///< stack size requirements in bytes; 0 is default stack size -} osThreadDef_t; - -/// Timer Definition structure contains timer parameters. -/// \note CAN BE CHANGED: \b os_timer_def is implementation specific in every CMSIS-RTOS. -typedef struct os_timer_def { - os_ptimer ptimer; ///< start address of a timer function -} osTimerDef_t; - -/// Mutex Definition structure contains setup information for a mutex. -/// \note CAN BE CHANGED: \b os_mutex_def is implementation specific in every CMSIS-RTOS. -typedef struct os_mutex_def { - uint32_t dummy; ///< dummy value. -} osMutexDef_t; - -/// Semaphore Definition structure contains setup information for a semaphore. -/// \note CAN BE CHANGED: \b os_semaphore_def is implementation specific in every CMSIS-RTOS. -typedef struct os_semaphore_def { - uint32_t dummy; ///< dummy value. -} osSemaphoreDef_t; - -/// Definition structure for memory block allocation. -/// \note CAN BE CHANGED: \b os_pool_def is implementation specific in every CMSIS-RTOS. -typedef struct os_pool_def { - uint32_t pool_sz; ///< number of items (elements) in the pool - uint32_t item_sz; ///< size of an item - void *pool; ///< pointer to memory for pool -} osPoolDef_t; - -/// Definition structure for message queue. -/// \note CAN BE CHANGED: \b os_messageQ_def is implementation specific in every CMSIS-RTOS. -typedef struct os_messageQ_def { - uint32_t queue_sz; ///< number of elements in the queue - uint32_t item_sz; ///< size of an item - void *pool; ///< memory array for messages -} osMessageQDef_t; - -/// Definition structure for mail queue. -/// \note CAN BE CHANGED: \b os_mailQ_def is implementation specific in every CMSIS-RTOS. -typedef struct os_mailQ_def { - uint32_t queue_sz; ///< number of elements in the queue - uint32_t item_sz; ///< size of an item - void *pool; ///< memory array for mail -} osMailQDef_t; - -/// Event structure contains detailed information about an event. -/// \note MUST REMAIN UNCHANGED: \b os_event shall be consistent in every CMSIS-RTOS. -/// However the struct may be extended at the end. -typedef struct { - osStatus status; ///< status code: event or error information - union { - uint32_t v; ///< message as 32-bit value - void *p; ///< message or mail as void pointer - int32_t signals; ///< signal flags - } value; ///< event value - union { - osMailQId mail_id; ///< mail id obtained by \ref osMailCreate - osMessageQId message_id; ///< message id obtained by \ref osMessageCreate - } def; ///< event definition -} osEvent; - - -// ==== Kernel Control Functions ==== - -/// Initialize the RTOS Kernel for creating objects. -/// \return status code that indicates the execution status of the function. -/// \note MUST REMAIN UNCHANGED: \b osKernelInitialize shall be consistent in every CMSIS-RTOS. -osStatus osKernelInitialize (void); - -/// Start the RTOS Kernel. -/// \return status code that indicates the execution status of the function. -/// \note MUST REMAIN UNCHANGED: \b osKernelStart shall be consistent in every CMSIS-RTOS. -osStatus osKernelStart (void); - -/// Check if the RTOS kernel is already started. -/// \note MUST REMAIN UNCHANGED: \b osKernelRunning shall be consistent in every CMSIS-RTOS. -/// \return 0 RTOS is not started, 1 RTOS is started. -int32_t osKernelRunning(void); - -#if (defined (osFeature_SysTick) && (osFeature_SysTick != 0)) // System Timer available - -/// Get the RTOS kernel system timer counter -/// \note MUST REMAIN UNCHANGED: \b osKernelSysTick shall be consistent in every CMSIS-RTOS. -/// \return RTOS kernel system timer as 32-bit value -uint32_t osKernelSysTick (void); - -/// The RTOS kernel system timer frequency in Hz -/// \note Reflects the system timer setting and is typically defined in a configuration file. -#define osKernelSysTickFrequency 100000000 - -/// Convert a microseconds value to a RTOS kernel system timer value. -/// \param microsec time value in microseconds. -/// \return time value normalized to the \ref osKernelSysTickFrequency -#define osKernelSysTickMicroSec(microsec) (((uint64_t)microsec * (osKernelSysTickFrequency)) / 1000000) - -#endif // System Timer available - -// ==== Thread Management ==== - -/// Create a Thread Definition with function, priority, and stack requirements. -/// \param name name of the thread function. -/// \param priority initial priority of the thread function. -/// \param instances number of possible thread instances. -/// \param stacksz stack size (in bytes) requirements for the thread function. -/// \note CAN BE CHANGED: The parameters to \b osThreadDef shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#if defined (osObjectsExternal) // object is external -#define osThreadDef(name, priority, instances, stacksz) \ -extern const osThreadDef_t os_thread_def_##name -#else // define the object -#define osThreadDef(name, priority, instances, stacksz) \ -const osThreadDef_t os_thread_def_##name = \ -{ (name), (priority), (instances), (stacksz) } -#endif - -/// Access a Thread definition. -/// \param name name of the thread definition object. -/// \note CAN BE CHANGED: The parameter to \b osThread shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#define osThread(name) \ -&os_thread_def_##name - -/// Create a thread and add it to Active Threads and set it to state READY. -/// \param[in] thread_def thread definition referenced with \ref osThread. -/// \param[in] argument pointer that is passed to the thread function as start argument. -/// \return thread ID for reference by other functions or NULL in case of error. -/// \note MUST REMAIN UNCHANGED: \b osThreadCreate shall be consistent in every CMSIS-RTOS. -osThreadId osThreadCreate (const osThreadDef_t *thread_def, void *argument); - -/// Return the thread ID of the current running thread. -/// \return thread ID for reference by other functions or NULL in case of error. -/// \note MUST REMAIN UNCHANGED: \b osThreadGetId shall be consistent in every CMSIS-RTOS. -osThreadId osThreadGetId (void); - -/// Terminate execution of a thread and remove it from Active Threads. -/// \param[in] thread_id thread ID obtained by \ref osThreadCreate or \ref osThreadGetId. -/// \return status code that indicates the execution status of the function. -/// \note MUST REMAIN UNCHANGED: \b osThreadTerminate shall be consistent in every CMSIS-RTOS. -osStatus osThreadTerminate (osThreadId thread_id); - -/// Pass control to next thread that is in state \b READY. -/// \return status code that indicates the execution status of the function. -/// \note MUST REMAIN UNCHANGED: \b osThreadYield shall be consistent in every CMSIS-RTOS. -osStatus osThreadYield (void); - -/// Change priority of an active thread. -/// \param[in] thread_id thread ID obtained by \ref osThreadCreate or \ref osThreadGetId. -/// \param[in] priority new priority value for the thread function. -/// \return status code that indicates the execution status of the function. -/// \note MUST REMAIN UNCHANGED: \b osThreadSetPriority shall be consistent in every CMSIS-RTOS. -osStatus osThreadSetPriority (osThreadId thread_id, osPriority priority); - -/// Get current priority of an active thread. -/// \param[in] thread_id thread ID obtained by \ref osThreadCreate or \ref osThreadGetId. -/// \return current priority value of the thread function. -/// \note MUST REMAIN UNCHANGED: \b osThreadGetPriority shall be consistent in every CMSIS-RTOS. -osPriority osThreadGetPriority (osThreadId thread_id); - - -// ==== Generic Wait Functions ==== - -/// Wait for Timeout (Time Delay). -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue "time delay" value -/// \return status code that indicates the execution status of the function. -osStatus osDelay (uint32_t millisec); - -#if (defined (osFeature_Wait) && (osFeature_Wait != 0)) // Generic Wait available - -/// Wait for Signal, Message, Mail, or Timeout. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out -/// \return event that contains signal, message, or mail information or error code. -/// \note MUST REMAIN UNCHANGED: \b osWait shall be consistent in every CMSIS-RTOS. -osEvent osWait (uint32_t millisec); - -#endif // Generic Wait available - - -// ==== Timer Management Functions ==== -/// Define a Timer object. -/// \param name name of the timer object. -/// \param function name of the timer call back function. -/// \note CAN BE CHANGED: The parameter to \b osTimerDef shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#if defined (osObjectsExternal) // object is external -#define osTimerDef(name, function) \ -extern const osTimerDef_t os_timer_def_##name -#else // define the object -#define osTimerDef(name, function) \ -const osTimerDef_t os_timer_def_##name = \ -{ (function) } -#endif - -/// Access a Timer definition. -/// \param name name of the timer object. -/// \note CAN BE CHANGED: The parameter to \b osTimer shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#define osTimer(name) \ -&os_timer_def_##name - -/// Create a timer. -/// \param[in] timer_def timer object referenced with \ref osTimer. -/// \param[in] type osTimerOnce for one-shot or osTimerPeriodic for periodic behavior. -/// \param[in] argument argument to the timer call back function. -/// \return timer ID for reference by other functions or NULL in case of error. -/// \note MUST REMAIN UNCHANGED: \b osTimerCreate shall be consistent in every CMSIS-RTOS. -osTimerId osTimerCreate (const osTimerDef_t *timer_def, os_timer_type type, void *argument); - -/// Start or restart a timer. -/// \param[in] timer_id timer ID obtained by \ref osTimerCreate. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue "time delay" value of the timer. -/// \return status code that indicates the execution status of the function. -/// \note MUST REMAIN UNCHANGED: \b osTimerStart shall be consistent in every CMSIS-RTOS. -osStatus osTimerStart (osTimerId timer_id, uint32_t millisec); - -/// Stop the timer. -/// \param[in] timer_id timer ID obtained by \ref osTimerCreate. -/// \return status code that indicates the execution status of the function. -/// \note MUST REMAIN UNCHANGED: \b osTimerStop shall be consistent in every CMSIS-RTOS. -osStatus osTimerStop (osTimerId timer_id); - -/// Delete a timer that was created by \ref osTimerCreate. -/// \param[in] timer_id timer ID obtained by \ref osTimerCreate. -/// \return status code that indicates the execution status of the function. -/// \note MUST REMAIN UNCHANGED: \b osTimerDelete shall be consistent in every CMSIS-RTOS. -osStatus osTimerDelete (osTimerId timer_id); - - -// ==== Signal Management ==== - -/// Set the specified Signal Flags of an active thread. -/// \param[in] thread_id thread ID obtained by \ref osThreadCreate or \ref osThreadGetId. -/// \param[in] signals specifies the signal flags of the thread that should be set. -/// \return previous signal flags of the specified thread or 0x80000000 in case of incorrect parameters. -/// \note MUST REMAIN UNCHANGED: \b osSignalSet shall be consistent in every CMSIS-RTOS. -int32_t osSignalSet (osThreadId thread_id, int32_t signals); - -/// Clear the specified Signal Flags of an active thread. -/// \param[in] thread_id thread ID obtained by \ref osThreadCreate or \ref osThreadGetId. -/// \param[in] signals specifies the signal flags of the thread that shall be cleared. -/// \return previous signal flags of the specified thread or 0x80000000 in case of incorrect parameters or call from ISR. -/// \note MUST REMAIN UNCHANGED: \b osSignalClear shall be consistent in every CMSIS-RTOS. -int32_t osSignalClear (osThreadId thread_id, int32_t signals); - -/// Wait for one or more Signal Flags to become signaled for the current \b RUNNING thread. -/// \param[in] signals wait until all specified signal flags set or 0 for any single signal flag. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out. -/// \return event flag information or error code. -/// \note MUST REMAIN UNCHANGED: \b osSignalWait shall be consistent in every CMSIS-RTOS. -osEvent osSignalWait (int32_t signals, uint32_t millisec); - - -// ==== Mutex Management ==== - -/// Define a Mutex. -/// \param name name of the mutex object. -/// \note CAN BE CHANGED: The parameter to \b osMutexDef shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#if defined (osObjectsExternal) // object is external -#define osMutexDef(name) \ -extern const osMutexDef_t os_mutex_def_##name -#else // define the object -#define osMutexDef(name) \ -const osMutexDef_t os_mutex_def_##name = { 0 } -#endif - -/// Access a Mutex definition. -/// \param name name of the mutex object. -/// \note CAN BE CHANGED: The parameter to \b osMutex shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#define osMutex(name) \ -&os_mutex_def_##name - -/// Create and Initialize a Mutex object. -/// \param[in] mutex_def mutex definition referenced with \ref osMutex. -/// \return mutex ID for reference by other functions or NULL in case of error. -/// \note MUST REMAIN UNCHANGED: \b osMutexCreate shall be consistent in every CMSIS-RTOS. -osMutexId osMutexCreate (const osMutexDef_t *mutex_def); - -/// Wait until a Mutex becomes available. -/// \param[in] mutex_id mutex ID obtained by \ref osMutexCreate. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out. -/// \return status code that indicates the execution status of the function. -/// \note MUST REMAIN UNCHANGED: \b osMutexWait shall be consistent in every CMSIS-RTOS. -osStatus osMutexWait (osMutexId mutex_id, uint32_t millisec); - -/// Release a Mutex that was obtained by \ref osMutexWait. -/// \param[in] mutex_id mutex ID obtained by \ref osMutexCreate. -/// \return status code that indicates the execution status of the function. -/// \note MUST REMAIN UNCHANGED: \b osMutexRelease shall be consistent in every CMSIS-RTOS. -osStatus osMutexRelease (osMutexId mutex_id); - -/// Delete a Mutex that was created by \ref osMutexCreate. -/// \param[in] mutex_id mutex ID obtained by \ref osMutexCreate. -/// \return status code that indicates the execution status of the function. -/// \note MUST REMAIN UNCHANGED: \b osMutexDelete shall be consistent in every CMSIS-RTOS. -osStatus osMutexDelete (osMutexId mutex_id); - - -// ==== Semaphore Management Functions ==== - -#if (defined (osFeature_Semaphore) && (osFeature_Semaphore != 0)) // Semaphore available - -/// Define a Semaphore object. -/// \param name name of the semaphore object. -/// \note CAN BE CHANGED: The parameter to \b osSemaphoreDef shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#if defined (osObjectsExternal) // object is external -#define osSemaphoreDef(name) \ -extern const osSemaphoreDef_t os_semaphore_def_##name -#else // define the object -#define osSemaphoreDef(name) \ -const osSemaphoreDef_t os_semaphore_def_##name = { 0 } -#endif - -/// Access a Semaphore definition. -/// \param name name of the semaphore object. -/// \note CAN BE CHANGED: The parameter to \b osSemaphore shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#define osSemaphore(name) \ -&os_semaphore_def_##name - -/// Create and Initialize a Semaphore object used for managing resources. -/// \param[in] semaphore_def semaphore definition referenced with \ref osSemaphore. -/// \param[in] count number of available resources. -/// \return semaphore ID for reference by other functions or NULL in case of error. -/// \note MUST REMAIN UNCHANGED: \b osSemaphoreCreate shall be consistent in every CMSIS-RTOS. -osSemaphoreId osSemaphoreCreate (const osSemaphoreDef_t *semaphore_def, int32_t count); - -/// Wait until a Semaphore token becomes available. -/// \param[in] semaphore_id semaphore object referenced with \ref osSemaphoreCreate. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out. -/// \return number of available tokens, or -1 in case of incorrect parameters. -/// \note MUST REMAIN UNCHANGED: \b osSemaphoreWait shall be consistent in every CMSIS-RTOS. -int32_t osSemaphoreWait (osSemaphoreId semaphore_id, uint32_t millisec); - -/// Release a Semaphore token. -/// \param[in] semaphore_id semaphore object referenced with \ref osSemaphoreCreate. -/// \return status code that indicates the execution status of the function. -/// \note MUST REMAIN UNCHANGED: \b osSemaphoreRelease shall be consistent in every CMSIS-RTOS. -osStatus osSemaphoreRelease (osSemaphoreId semaphore_id); - -/// Delete a Semaphore that was created by \ref osSemaphoreCreate. -/// \param[in] semaphore_id semaphore object referenced with \ref osSemaphoreCreate. -/// \return status code that indicates the execution status of the function. -/// \note MUST REMAIN UNCHANGED: \b osSemaphoreDelete shall be consistent in every CMSIS-RTOS. -osStatus osSemaphoreDelete (osSemaphoreId semaphore_id); - -#endif // Semaphore available - - -// ==== Memory Pool Management Functions ==== - -#if (defined (osFeature_Pool) && (osFeature_Pool != 0)) // Memory Pool Management available - -/// \brief Define a Memory Pool. -/// \param name name of the memory pool. -/// \param no maximum number of blocks (objects) in the memory pool. -/// \param type data type of a single block (object). -/// \note CAN BE CHANGED: The parameter to \b osPoolDef shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#if defined (osObjectsExternal) // object is external -#define osPoolDef(name, no, type) \ -extern const osPoolDef_t os_pool_def_##name -#else // define the object -#define osPoolDef(name, no, type) \ -const osPoolDef_t os_pool_def_##name = \ -{ (no), sizeof(type), NULL } -#endif - -/// \brief Access a Memory Pool definition. -/// \param name name of the memory pool -/// \note CAN BE CHANGED: The parameter to \b osPool shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#define osPool(name) \ -&os_pool_def_##name - -/// Create and Initialize a memory pool. -/// \param[in] pool_def memory pool definition referenced with \ref osPool. -/// \return memory pool ID for reference by other functions or NULL in case of error. -/// \note MUST REMAIN UNCHANGED: \b osPoolCreate shall be consistent in every CMSIS-RTOS. -osPoolId osPoolCreate (const osPoolDef_t *pool_def); - -/// Allocate a memory block from a memory pool. -/// \param[in] pool_id memory pool ID obtain referenced with \ref osPoolCreate. -/// \return address of the allocated memory block or NULL in case of no memory available. -/// \note MUST REMAIN UNCHANGED: \b osPoolAlloc shall be consistent in every CMSIS-RTOS. -void *osPoolAlloc (osPoolId pool_id); - -/// Allocate a memory block from a memory pool and set memory block to zero. -/// \param[in] pool_id memory pool ID obtain referenced with \ref osPoolCreate. -/// \return address of the allocated memory block or NULL in case of no memory available. -/// \note MUST REMAIN UNCHANGED: \b osPoolCAlloc shall be consistent in every CMSIS-RTOS. -void *osPoolCAlloc (osPoolId pool_id); - -/// Return an allocated memory block back to a specific memory pool. -/// \param[in] pool_id memory pool ID obtain referenced with \ref osPoolCreate. -/// \param[in] block address of the allocated memory block that is returned to the memory pool. -/// \return status code that indicates the execution status of the function. -/// \note MUST REMAIN UNCHANGED: \b osPoolFree shall be consistent in every CMSIS-RTOS. -osStatus osPoolFree (osPoolId pool_id, void *block); - -#endif // Memory Pool Management available - - -// ==== Message Queue Management Functions ==== - -#if (defined (osFeature_MessageQ) && (osFeature_MessageQ != 0)) // Message Queues available - -/// \brief Create a Message Queue Definition. -/// \param name name of the queue. -/// \param queue_sz maximum number of messages in the queue. -/// \param type data type of a single message element (for debugger). -/// \note CAN BE CHANGED: The parameter to \b osMessageQDef shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#if defined (osObjectsExternal) // object is external -#define osMessageQDef(name, queue_sz, type) \ -extern const osMessageQDef_t os_messageQ_def_##name -#else // define the object -#define osMessageQDef(name, queue_sz, type) \ -const osMessageQDef_t os_messageQ_def_##name = \ -{ (queue_sz), sizeof (type) } -#endif - -/// \brief Access a Message Queue Definition. -/// \param name name of the queue -/// \note CAN BE CHANGED: The parameter to \b osMessageQ shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#define osMessageQ(name) \ -&os_messageQ_def_##name - -/// Create and Initialize a Message Queue. -/// \param[in] queue_def queue definition referenced with \ref osMessageQ. -/// \param[in] thread_id thread ID (obtained by \ref osThreadCreate or \ref osThreadGetId) or NULL. -/// \return message queue ID for reference by other functions or NULL in case of error. -/// \note MUST REMAIN UNCHANGED: \b osMessageCreate shall be consistent in every CMSIS-RTOS. -osMessageQId osMessageCreate (const osMessageQDef_t *queue_def, osThreadId thread_id); - -/// Put a Message to a Queue. -/// \param[in] queue_id message queue ID obtained with \ref osMessageCreate. -/// \param[in] info message information. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out. -/// \return status code that indicates the execution status of the function. -/// \note MUST REMAIN UNCHANGED: \b osMessagePut shall be consistent in every CMSIS-RTOS. -osStatus osMessagePut (osMessageQId queue_id, uint32_t info, uint32_t millisec); - -/// Get a Message or Wait for a Message from a Queue. -/// \param[in] queue_id message queue ID obtained with \ref osMessageCreate. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out. -/// \return event information that includes status code. -/// \note MUST REMAIN UNCHANGED: \b osMessageGet shall be consistent in every CMSIS-RTOS. -osEvent osMessageGet (osMessageQId queue_id, uint32_t millisec); - -#endif // Message Queues available - - -// ==== Mail Queue Management Functions ==== - -#if (defined (osFeature_MailQ) && (osFeature_MailQ != 0)) // Mail Queues available - -/// \brief Create a Mail Queue Definition. -/// \param name name of the queue -/// \param queue_sz maximum number of messages in queue -/// \param type data type of a single message element -/// \note CAN BE CHANGED: The parameter to \b osMailQDef shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#if defined (osObjectsExternal) // object is external -#define osMailQDef(name, queue_sz, type) \ -extern const osMailQDef_t os_mailQ_def_##name -#else // define the object -#define osMailQDef(name, queue_sz, type) \ -const osMailQDef_t os_mailQ_def_##name = \ -{ (queue_sz), sizeof (type) } -#endif - -/// \brief Access a Mail Queue Definition. -/// \param name name of the queue -/// \note CAN BE CHANGED: The parameter to \b osMailQ shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#define osMailQ(name) \ -&os_mailQ_def_##name - -/// Create and Initialize mail queue. -/// \param[in] queue_def reference to the mail queue definition obtain with \ref osMailQ -/// \param[in] thread_id thread ID (obtained by \ref osThreadCreate or \ref osThreadGetId) or NULL. -/// \return mail queue ID for reference by other functions or NULL in case of error. -/// \note MUST REMAIN UNCHANGED: \b osMailCreate shall be consistent in every CMSIS-RTOS. -osMailQId osMailCreate (const osMailQDef_t *queue_def, osThreadId thread_id); - -/// Allocate a memory block from a mail. -/// \param[in] queue_id mail queue ID obtained with \ref osMailCreate. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out -/// \return pointer to memory block that can be filled with mail or NULL in case of error. -/// \note MUST REMAIN UNCHANGED: \b osMailAlloc shall be consistent in every CMSIS-RTOS. -void *osMailAlloc (osMailQId queue_id, uint32_t millisec); - -/// Allocate a memory block from a mail and set memory block to zero. -/// \param[in] queue_id mail queue ID obtained with \ref osMailCreate. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out -/// \return pointer to memory block that can be filled with mail or NULL in case of error. -/// \note MUST REMAIN UNCHANGED: \b osMailCAlloc shall be consistent in every CMSIS-RTOS. -void *osMailCAlloc (osMailQId queue_id, uint32_t millisec); - -/// Put a mail to a queue. -/// \param[in] queue_id mail queue ID obtained with \ref osMailCreate. -/// \param[in] mail memory block previously allocated with \ref osMailAlloc or \ref osMailCAlloc. -/// \return status code that indicates the execution status of the function. -/// \note MUST REMAIN UNCHANGED: \b osMailPut shall be consistent in every CMSIS-RTOS. -osStatus osMailPut (osMailQId queue_id, void *mail); - -/// Get a mail from a queue. -/// \param[in] queue_id mail queue ID obtained with \ref osMailCreate. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out -/// \return event that contains mail information or error code. -/// \note MUST REMAIN UNCHANGED: \b osMailGet shall be consistent in every CMSIS-RTOS. -osEvent osMailGet (osMailQId queue_id, uint32_t millisec); - -/// Free a memory block from a mail. -/// \param[in] queue_id mail queue ID obtained with \ref osMailCreate. -/// \param[in] mail pointer to the memory block that was obtained with \ref osMailGet. -/// \return status code that indicates the execution status of the function. -/// \note MUST REMAIN UNCHANGED: \b osMailFree shall be consistent in every CMSIS-RTOS. -osStatus osMailFree (osMailQId queue_id, void *mail); - -#endif // Mail Queues available - - -#ifdef __cplusplus -} -#endif - -#endif // _CMSIS_OS_H diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Include/cmsis_os2.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Include/cmsis_os2.h deleted file mode 100644 index 76612e2..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Include/cmsis_os2.h +++ /dev/null @@ -1,756 +0,0 @@ -/* - * Copyright (c) 2013-2020 Arm Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * ---------------------------------------------------------------------- - * - * $Date: 12. June 2020 - * $Revision: V2.1.3 - * - * Project: CMSIS-RTOS2 API - * Title: cmsis_os2.h header file - * - * Version 2.1.3 - * Additional functions allowed to be called from Interrupt Service Routines: - * - osThreadGetId - * Version 2.1.2 - * Additional functions allowed to be called from Interrupt Service Routines: - * - osKernelGetInfo, osKernelGetState - * Version 2.1.1 - * Additional functions allowed to be called from Interrupt Service Routines: - * - osKernelGetTickCount, osKernelGetTickFreq - * Changed Kernel Tick type to uint32_t: - * - updated: osKernelGetTickCount, osDelayUntil - * Version 2.1.0 - * Support for critical and uncritical sections (nesting safe): - * - updated: osKernelLock, osKernelUnlock - * - added: osKernelRestoreLock - * Updated Thread and Event Flags: - * - changed flags parameter and return type from int32_t to uint32_t - * Version 2.0.0 - * Initial Release - *---------------------------------------------------------------------------*/ - -#ifndef CMSIS_OS2_H_ -#define CMSIS_OS2_H_ - -#ifndef __NO_RETURN -#if defined(__CC_ARM) -#define __NO_RETURN __declspec(noreturn) -#elif defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050) -#define __NO_RETURN __attribute__((__noreturn__)) -#elif defined(__GNUC__) -#define __NO_RETURN __attribute__((__noreturn__)) -#elif defined(__ICCARM__) -#define __NO_RETURN __noreturn -#else -#define __NO_RETURN -#endif -#endif - -#include -#include - -#ifdef __cplusplus -extern "C" -{ -#endif - - -// ==== Enumerations, structures, defines ==== - -/// Version information. -typedef struct { - uint32_t api; ///< API version (major.minor.rev: mmnnnrrrr dec). - uint32_t kernel; ///< Kernel version (major.minor.rev: mmnnnrrrr dec). -} osVersion_t; - -/// Kernel state. -typedef enum { - osKernelInactive = 0, ///< Inactive. - osKernelReady = 1, ///< Ready. - osKernelRunning = 2, ///< Running. - osKernelLocked = 3, ///< Locked. - osKernelSuspended = 4, ///< Suspended. - osKernelError = -1, ///< Error. - osKernelReserved = 0x7FFFFFFF ///< Prevents enum down-size compiler optimization. -} osKernelState_t; - -/// Thread state. -typedef enum { - osThreadInactive = 0, ///< Inactive. - osThreadReady = 1, ///< Ready. - osThreadRunning = 2, ///< Running. - osThreadBlocked = 3, ///< Blocked. - osThreadTerminated = 4, ///< Terminated. - osThreadError = -1, ///< Error. - osThreadReserved = 0x7FFFFFFF ///< Prevents enum down-size compiler optimization. -} osThreadState_t; - -/// Priority values. -typedef enum { - osPriorityNone = 0, ///< No priority (not initialized). - osPriorityIdle = 1, ///< Reserved for Idle thread. - osPriorityLow = 8, ///< Priority: low - osPriorityLow1 = 8+1, ///< Priority: low + 1 - osPriorityLow2 = 8+2, ///< Priority: low + 2 - osPriorityLow3 = 8+3, ///< Priority: low + 3 - osPriorityLow4 = 8+4, ///< Priority: low + 4 - osPriorityLow5 = 8+5, ///< Priority: low + 5 - osPriorityLow6 = 8+6, ///< Priority: low + 6 - osPriorityLow7 = 8+7, ///< Priority: low + 7 - osPriorityBelowNormal = 16, ///< Priority: below normal - osPriorityBelowNormal1 = 16+1, ///< Priority: below normal + 1 - osPriorityBelowNormal2 = 16+2, ///< Priority: below normal + 2 - osPriorityBelowNormal3 = 16+3, ///< Priority: below normal + 3 - osPriorityBelowNormal4 = 16+4, ///< Priority: below normal + 4 - osPriorityBelowNormal5 = 16+5, ///< Priority: below normal + 5 - osPriorityBelowNormal6 = 16+6, ///< Priority: below normal + 6 - osPriorityBelowNormal7 = 16+7, ///< Priority: below normal + 7 - osPriorityNormal = 24, ///< Priority: normal - osPriorityNormal1 = 24+1, ///< Priority: normal + 1 - osPriorityNormal2 = 24+2, ///< Priority: normal + 2 - osPriorityNormal3 = 24+3, ///< Priority: normal + 3 - osPriorityNormal4 = 24+4, ///< Priority: normal + 4 - osPriorityNormal5 = 24+5, ///< Priority: normal + 5 - osPriorityNormal6 = 24+6, ///< Priority: normal + 6 - osPriorityNormal7 = 24+7, ///< Priority: normal + 7 - osPriorityAboveNormal = 32, ///< Priority: above normal - osPriorityAboveNormal1 = 32+1, ///< Priority: above normal + 1 - osPriorityAboveNormal2 = 32+2, ///< Priority: above normal + 2 - osPriorityAboveNormal3 = 32+3, ///< Priority: above normal + 3 - osPriorityAboveNormal4 = 32+4, ///< Priority: above normal + 4 - osPriorityAboveNormal5 = 32+5, ///< Priority: above normal + 5 - osPriorityAboveNormal6 = 32+6, ///< Priority: above normal + 6 - osPriorityAboveNormal7 = 32+7, ///< Priority: above normal + 7 - osPriorityHigh = 40, ///< Priority: high - osPriorityHigh1 = 40+1, ///< Priority: high + 1 - osPriorityHigh2 = 40+2, ///< Priority: high + 2 - osPriorityHigh3 = 40+3, ///< Priority: high + 3 - osPriorityHigh4 = 40+4, ///< Priority: high + 4 - osPriorityHigh5 = 40+5, ///< Priority: high + 5 - osPriorityHigh6 = 40+6, ///< Priority: high + 6 - osPriorityHigh7 = 40+7, ///< Priority: high + 7 - osPriorityRealtime = 48, ///< Priority: realtime - osPriorityRealtime1 = 48+1, ///< Priority: realtime + 1 - osPriorityRealtime2 = 48+2, ///< Priority: realtime + 2 - osPriorityRealtime3 = 48+3, ///< Priority: realtime + 3 - osPriorityRealtime4 = 48+4, ///< Priority: realtime + 4 - osPriorityRealtime5 = 48+5, ///< Priority: realtime + 5 - osPriorityRealtime6 = 48+6, ///< Priority: realtime + 6 - osPriorityRealtime7 = 48+7, ///< Priority: realtime + 7 - osPriorityISR = 56, ///< Reserved for ISR deferred thread. - osPriorityError = -1, ///< System cannot determine priority or illegal priority. - osPriorityReserved = 0x7FFFFFFF ///< Prevents enum down-size compiler optimization. -} osPriority_t; - -/// Entry point of a thread. -typedef void (*osThreadFunc_t) (void *argument); - -/// Timer callback function. -typedef void (*osTimerFunc_t) (void *argument); - -/// Timer type. -typedef enum { - osTimerOnce = 0, ///< One-shot timer. - osTimerPeriodic = 1 ///< Repeating timer. -} osTimerType_t; - -// Timeout value. -#define osWaitForever 0xFFFFFFFFU ///< Wait forever timeout value. - -// Flags options (\ref osThreadFlagsWait and \ref osEventFlagsWait). -#define osFlagsWaitAny 0x00000000U ///< Wait for any flag (default). -#define osFlagsWaitAll 0x00000001U ///< Wait for all flags. -#define osFlagsNoClear 0x00000002U ///< Do not clear flags which have been specified to wait for. - -// Flags errors (returned by osThreadFlagsXxxx and osEventFlagsXxxx). -#define osFlagsError 0x80000000U ///< Error indicator. -#define osFlagsErrorUnknown 0xFFFFFFFFU ///< osError (-1). -#define osFlagsErrorTimeout 0xFFFFFFFEU ///< osErrorTimeout (-2). -#define osFlagsErrorResource 0xFFFFFFFDU ///< osErrorResource (-3). -#define osFlagsErrorParameter 0xFFFFFFFCU ///< osErrorParameter (-4). -#define osFlagsErrorISR 0xFFFFFFFAU ///< osErrorISR (-6). - -// Thread attributes (attr_bits in \ref osThreadAttr_t). -#define osThreadDetached 0x00000000U ///< Thread created in detached mode (default) -#define osThreadJoinable 0x00000001U ///< Thread created in joinable mode - -// Mutex attributes (attr_bits in \ref osMutexAttr_t). -#define osMutexRecursive 0x00000001U ///< Recursive mutex. -#define osMutexPrioInherit 0x00000002U ///< Priority inherit protocol. -#define osMutexRobust 0x00000008U ///< Robust mutex. - -/// Status code values returned by CMSIS-RTOS functions. -typedef enum { - osOK = 0, ///< Operation completed successfully. - osError = -1, ///< Unspecified RTOS error: run-time error but no other error message fits. - osErrorTimeout = -2, ///< Operation not completed within the timeout period. - osErrorResource = -3, ///< Resource not available. - osErrorParameter = -4, ///< Parameter error. - osErrorNoMemory = -5, ///< System is out of memory: it was impossible to allocate or reserve memory for the operation. - osErrorISR = -6, ///< Not allowed in ISR context: the function cannot be called from interrupt service routines. - osStatusReserved = 0x7FFFFFFF ///< Prevents enum down-size compiler optimization. -} osStatus_t; - - -/// \details Thread ID identifies the thread. -typedef void *osThreadId_t; - -/// \details Timer ID identifies the timer. -typedef void *osTimerId_t; - -/// \details Event Flags ID identifies the event flags. -typedef void *osEventFlagsId_t; - -/// \details Mutex ID identifies the mutex. -typedef void *osMutexId_t; - -/// \details Semaphore ID identifies the semaphore. -typedef void *osSemaphoreId_t; - -/// \details Memory Pool ID identifies the memory pool. -typedef void *osMemoryPoolId_t; - -/// \details Message Queue ID identifies the message queue. -typedef void *osMessageQueueId_t; - - -#ifndef TZ_MODULEID_T -#define TZ_MODULEID_T -/// \details Data type that identifies secure software modules called by a process. -typedef uint32_t TZ_ModuleId_t; -#endif - - -/// Attributes structure for thread. -typedef struct { - const char *name; ///< name of the thread - uint32_t attr_bits; ///< attribute bits - void *cb_mem; ///< memory for control block - uint32_t cb_size; ///< size of provided memory for control block - void *stack_mem; ///< memory for stack - uint32_t stack_size; ///< size of stack - osPriority_t priority; ///< initial thread priority (default: osPriorityNormal) - TZ_ModuleId_t tz_module; ///< TrustZone module identifier - uint32_t reserved; ///< reserved (must be 0) -} osThreadAttr_t; - -/// Attributes structure for timer. -typedef struct { - const char *name; ///< name of the timer - uint32_t attr_bits; ///< attribute bits - void *cb_mem; ///< memory for control block - uint32_t cb_size; ///< size of provided memory for control block -} osTimerAttr_t; - -/// Attributes structure for event flags. -typedef struct { - const char *name; ///< name of the event flags - uint32_t attr_bits; ///< attribute bits - void *cb_mem; ///< memory for control block - uint32_t cb_size; ///< size of provided memory for control block -} osEventFlagsAttr_t; - -/// Attributes structure for mutex. -typedef struct { - const char *name; ///< name of the mutex - uint32_t attr_bits; ///< attribute bits - void *cb_mem; ///< memory for control block - uint32_t cb_size; ///< size of provided memory for control block -} osMutexAttr_t; - -/// Attributes structure for semaphore. -typedef struct { - const char *name; ///< name of the semaphore - uint32_t attr_bits; ///< attribute bits - void *cb_mem; ///< memory for control block - uint32_t cb_size; ///< size of provided memory for control block -} osSemaphoreAttr_t; - -/// Attributes structure for memory pool. -typedef struct { - const char *name; ///< name of the memory pool - uint32_t attr_bits; ///< attribute bits - void *cb_mem; ///< memory for control block - uint32_t cb_size; ///< size of provided memory for control block - void *mp_mem; ///< memory for data storage - uint32_t mp_size; ///< size of provided memory for data storage -} osMemoryPoolAttr_t; - -/// Attributes structure for message queue. -typedef struct { - const char *name; ///< name of the message queue - uint32_t attr_bits; ///< attribute bits - void *cb_mem; ///< memory for control block - uint32_t cb_size; ///< size of provided memory for control block - void *mq_mem; ///< memory for data storage - uint32_t mq_size; ///< size of provided memory for data storage -} osMessageQueueAttr_t; - - -// ==== Kernel Management Functions ==== - -/// Initialize the RTOS Kernel. -/// \return status code that indicates the execution status of the function. -osStatus_t osKernelInitialize (void); - -/// Get RTOS Kernel Information. -/// \param[out] version pointer to buffer for retrieving version information. -/// \param[out] id_buf pointer to buffer for retrieving kernel identification string. -/// \param[in] id_size size of buffer for kernel identification string. -/// \return status code that indicates the execution status of the function. -osStatus_t osKernelGetInfo (osVersion_t *version, char *id_buf, uint32_t id_size); - -/// Get the current RTOS Kernel state. -/// \return current RTOS Kernel state. -osKernelState_t osKernelGetState (void); - -/// Start the RTOS Kernel scheduler. -/// \return status code that indicates the execution status of the function. -osStatus_t osKernelStart (void); - -/// Lock the RTOS Kernel scheduler. -/// \return previous lock state (1 - locked, 0 - not locked, error code if negative). -int32_t osKernelLock (void); - -/// Unlock the RTOS Kernel scheduler. -/// \return previous lock state (1 - locked, 0 - not locked, error code if negative). -int32_t osKernelUnlock (void); - -/// Restore the RTOS Kernel scheduler lock state. -/// \param[in] lock lock state obtained by \ref osKernelLock or \ref osKernelUnlock. -/// \return new lock state (1 - locked, 0 - not locked, error code if negative). -int32_t osKernelRestoreLock (int32_t lock); - -/// Suspend the RTOS Kernel scheduler. -/// \return time in ticks, for how long the system can sleep or power-down. -uint32_t osKernelSuspend (void); - -/// Resume the RTOS Kernel scheduler. -/// \param[in] sleep_ticks time in ticks for how long the system was in sleep or power-down mode. -void osKernelResume (uint32_t sleep_ticks); - -/// Get the RTOS kernel tick count. -/// \return RTOS kernel current tick count. -uint32_t osKernelGetTickCount (void); - -/// Get the RTOS kernel tick frequency. -/// \return frequency of the kernel tick in hertz, i.e. kernel ticks per second. -uint32_t osKernelGetTickFreq (void); - -/// Get the RTOS kernel system timer count. -/// \return RTOS kernel current system timer count as 32-bit value. -uint32_t osKernelGetSysTimerCount (void); - -/// Get the RTOS kernel system timer frequency. -/// \return frequency of the system timer in hertz, i.e. timer ticks per second. -uint32_t osKernelGetSysTimerFreq (void); - - -// ==== Thread Management Functions ==== - -/// Create a thread and add it to Active Threads. -/// \param[in] func thread function. -/// \param[in] argument pointer that is passed to the thread function as start argument. -/// \param[in] attr thread attributes; NULL: default values. -/// \return thread ID for reference by other functions or NULL in case of error. -osThreadId_t osThreadNew (osThreadFunc_t func, void *argument, const osThreadAttr_t *attr); - -/// Get name of a thread. -/// \param[in] thread_id thread ID obtained by \ref osThreadNew or \ref osThreadGetId. -/// \return name as null-terminated string. -const char *osThreadGetName (osThreadId_t thread_id); - -/// Return the thread ID of the current running thread. -/// \return thread ID for reference by other functions or NULL in case of error. -osThreadId_t osThreadGetId (void); - -/// Get current thread state of a thread. -/// \param[in] thread_id thread ID obtained by \ref osThreadNew or \ref osThreadGetId. -/// \return current thread state of the specified thread. -osThreadState_t osThreadGetState (osThreadId_t thread_id); - -/// Get stack size of a thread. -/// \param[in] thread_id thread ID obtained by \ref osThreadNew or \ref osThreadGetId. -/// \return stack size in bytes. -uint32_t osThreadGetStackSize (osThreadId_t thread_id); - -/// Get available stack space of a thread based on stack watermark recording during execution. -/// \param[in] thread_id thread ID obtained by \ref osThreadNew or \ref osThreadGetId. -/// \return remaining stack space in bytes. -uint32_t osThreadGetStackSpace (osThreadId_t thread_id); - -/// Change priority of a thread. -/// \param[in] thread_id thread ID obtained by \ref osThreadNew or \ref osThreadGetId. -/// \param[in] priority new priority value for the thread function. -/// \return status code that indicates the execution status of the function. -osStatus_t osThreadSetPriority (osThreadId_t thread_id, osPriority_t priority); - -/// Get current priority of a thread. -/// \param[in] thread_id thread ID obtained by \ref osThreadNew or \ref osThreadGetId. -/// \return current priority value of the specified thread. -osPriority_t osThreadGetPriority (osThreadId_t thread_id); - -/// Pass control to next thread that is in state \b READY. -/// \return status code that indicates the execution status of the function. -osStatus_t osThreadYield (void); - -/// Suspend execution of a thread. -/// \param[in] thread_id thread ID obtained by \ref osThreadNew or \ref osThreadGetId. -/// \return status code that indicates the execution status of the function. -osStatus_t osThreadSuspend (osThreadId_t thread_id); - -/// Resume execution of a thread. -/// \param[in] thread_id thread ID obtained by \ref osThreadNew or \ref osThreadGetId. -/// \return status code that indicates the execution status of the function. -osStatus_t osThreadResume (osThreadId_t thread_id); - -/// Detach a thread (thread storage can be reclaimed when thread terminates). -/// \param[in] thread_id thread ID obtained by \ref osThreadNew or \ref osThreadGetId. -/// \return status code that indicates the execution status of the function. -osStatus_t osThreadDetach (osThreadId_t thread_id); - -/// Wait for specified thread to terminate. -/// \param[in] thread_id thread ID obtained by \ref osThreadNew or \ref osThreadGetId. -/// \return status code that indicates the execution status of the function. -osStatus_t osThreadJoin (osThreadId_t thread_id); - -/// Terminate execution of current running thread. -__NO_RETURN void osThreadExit (void); - -/// Terminate execution of a thread. -/// \param[in] thread_id thread ID obtained by \ref osThreadNew or \ref osThreadGetId. -/// \return status code that indicates the execution status of the function. -osStatus_t osThreadTerminate (osThreadId_t thread_id); - -/// Get number of active threads. -/// \return number of active threads. -uint32_t osThreadGetCount (void); - -/// Enumerate active threads. -/// \param[out] thread_array pointer to array for retrieving thread IDs. -/// \param[in] array_items maximum number of items in array for retrieving thread IDs. -/// \return number of enumerated threads. -uint32_t osThreadEnumerate (osThreadId_t *thread_array, uint32_t array_items); - - -// ==== Thread Flags Functions ==== - -/// Set the specified Thread Flags of a thread. -/// \param[in] thread_id thread ID obtained by \ref osThreadNew or \ref osThreadGetId. -/// \param[in] flags specifies the flags of the thread that shall be set. -/// \return thread flags after setting or error code if highest bit set. -uint32_t osThreadFlagsSet (osThreadId_t thread_id, uint32_t flags); - -/// Clear the specified Thread Flags of current running thread. -/// \param[in] flags specifies the flags of the thread that shall be cleared. -/// \return thread flags before clearing or error code if highest bit set. -uint32_t osThreadFlagsClear (uint32_t flags); - -/// Get the current Thread Flags of current running thread. -/// \return current thread flags. -uint32_t osThreadFlagsGet (void); - -/// Wait for one or more Thread Flags of the current running thread to become signaled. -/// \param[in] flags specifies the flags to wait for. -/// \param[in] options specifies flags options (osFlagsXxxx). -/// \param[in] timeout \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out. -/// \return thread flags before clearing or error code if highest bit set. -uint32_t osThreadFlagsWait (uint32_t flags, uint32_t options, uint32_t timeout); - - -// ==== Generic Wait Functions ==== - -/// Wait for Timeout (Time Delay). -/// \param[in] ticks \ref CMSIS_RTOS_TimeOutValue "time ticks" value -/// \return status code that indicates the execution status of the function. -osStatus_t osDelay (uint32_t ticks); - -/// Wait until specified time. -/// \param[in] ticks absolute time in ticks -/// \return status code that indicates the execution status of the function. -osStatus_t osDelayUntil (uint32_t ticks); - - -// ==== Timer Management Functions ==== - -/// Create and Initialize a timer. -/// \param[in] func function pointer to callback function. -/// \param[in] type \ref osTimerOnce for one-shot or \ref osTimerPeriodic for periodic behavior. -/// \param[in] argument argument to the timer callback function. -/// \param[in] attr timer attributes; NULL: default values. -/// \return timer ID for reference by other functions or NULL in case of error. -osTimerId_t osTimerNew (osTimerFunc_t func, osTimerType_t type, void *argument, const osTimerAttr_t *attr); - -/// Get name of a timer. -/// \param[in] timer_id timer ID obtained by \ref osTimerNew. -/// \return name as null-terminated string. -const char *osTimerGetName (osTimerId_t timer_id); - -/// Start or restart a timer. -/// \param[in] timer_id timer ID obtained by \ref osTimerNew. -/// \param[in] ticks \ref CMSIS_RTOS_TimeOutValue "time ticks" value of the timer. -/// \return status code that indicates the execution status of the function. -osStatus_t osTimerStart (osTimerId_t timer_id, uint32_t ticks); - -/// Stop a timer. -/// \param[in] timer_id timer ID obtained by \ref osTimerNew. -/// \return status code that indicates the execution status of the function. -osStatus_t osTimerStop (osTimerId_t timer_id); - -/// Check if a timer is running. -/// \param[in] timer_id timer ID obtained by \ref osTimerNew. -/// \return 0 not running, 1 running. -uint32_t osTimerIsRunning (osTimerId_t timer_id); - -/// Delete a timer. -/// \param[in] timer_id timer ID obtained by \ref osTimerNew. -/// \return status code that indicates the execution status of the function. -osStatus_t osTimerDelete (osTimerId_t timer_id); - - -// ==== Event Flags Management Functions ==== - -/// Create and Initialize an Event Flags object. -/// \param[in] attr event flags attributes; NULL: default values. -/// \return event flags ID for reference by other functions or NULL in case of error. -osEventFlagsId_t osEventFlagsNew (const osEventFlagsAttr_t *attr); - -/// Get name of an Event Flags object. -/// \param[in] ef_id event flags ID obtained by \ref osEventFlagsNew. -/// \return name as null-terminated string. -const char *osEventFlagsGetName (osEventFlagsId_t ef_id); - -/// Set the specified Event Flags. -/// \param[in] ef_id event flags ID obtained by \ref osEventFlagsNew. -/// \param[in] flags specifies the flags that shall be set. -/// \return event flags after setting or error code if highest bit set. -uint32_t osEventFlagsSet (osEventFlagsId_t ef_id, uint32_t flags); - -/// Clear the specified Event Flags. -/// \param[in] ef_id event flags ID obtained by \ref osEventFlagsNew. -/// \param[in] flags specifies the flags that shall be cleared. -/// \return event flags before clearing or error code if highest bit set. -uint32_t osEventFlagsClear (osEventFlagsId_t ef_id, uint32_t flags); - -/// Get the current Event Flags. -/// \param[in] ef_id event flags ID obtained by \ref osEventFlagsNew. -/// \return current event flags. -uint32_t osEventFlagsGet (osEventFlagsId_t ef_id); - -/// Wait for one or more Event Flags to become signaled. -/// \param[in] ef_id event flags ID obtained by \ref osEventFlagsNew. -/// \param[in] flags specifies the flags to wait for. -/// \param[in] options specifies flags options (osFlagsXxxx). -/// \param[in] timeout \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out. -/// \return event flags before clearing or error code if highest bit set. -uint32_t osEventFlagsWait (osEventFlagsId_t ef_id, uint32_t flags, uint32_t options, uint32_t timeout); - -/// Delete an Event Flags object. -/// \param[in] ef_id event flags ID obtained by \ref osEventFlagsNew. -/// \return status code that indicates the execution status of the function. -osStatus_t osEventFlagsDelete (osEventFlagsId_t ef_id); - - -// ==== Mutex Management Functions ==== - -/// Create and Initialize a Mutex object. -/// \param[in] attr mutex attributes; NULL: default values. -/// \return mutex ID for reference by other functions or NULL in case of error. -osMutexId_t osMutexNew (const osMutexAttr_t *attr); - -/// Get name of a Mutex object. -/// \param[in] mutex_id mutex ID obtained by \ref osMutexNew. -/// \return name as null-terminated string. -const char *osMutexGetName (osMutexId_t mutex_id); - -/// Acquire a Mutex or timeout if it is locked. -/// \param[in] mutex_id mutex ID obtained by \ref osMutexNew. -/// \param[in] timeout \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out. -/// \return status code that indicates the execution status of the function. -osStatus_t osMutexAcquire (osMutexId_t mutex_id, uint32_t timeout); - -/// Release a Mutex that was acquired by \ref osMutexAcquire. -/// \param[in] mutex_id mutex ID obtained by \ref osMutexNew. -/// \return status code that indicates the execution status of the function. -osStatus_t osMutexRelease (osMutexId_t mutex_id); - -/// Get Thread which owns a Mutex object. -/// \param[in] mutex_id mutex ID obtained by \ref osMutexNew. -/// \return thread ID of owner thread or NULL when mutex was not acquired. -osThreadId_t osMutexGetOwner (osMutexId_t mutex_id); - -/// Delete a Mutex object. -/// \param[in] mutex_id mutex ID obtained by \ref osMutexNew. -/// \return status code that indicates the execution status of the function. -osStatus_t osMutexDelete (osMutexId_t mutex_id); - - -// ==== Semaphore Management Functions ==== - -/// Create and Initialize a Semaphore object. -/// \param[in] max_count maximum number of available tokens. -/// \param[in] initial_count initial number of available tokens. -/// \param[in] attr semaphore attributes; NULL: default values. -/// \return semaphore ID for reference by other functions or NULL in case of error. -osSemaphoreId_t osSemaphoreNew (uint32_t max_count, uint32_t initial_count, const osSemaphoreAttr_t *attr); - -/// Get name of a Semaphore object. -/// \param[in] semaphore_id semaphore ID obtained by \ref osSemaphoreNew. -/// \return name as null-terminated string. -const char *osSemaphoreGetName (osSemaphoreId_t semaphore_id); - -/// Acquire a Semaphore token or timeout if no tokens are available. -/// \param[in] semaphore_id semaphore ID obtained by \ref osSemaphoreNew. -/// \param[in] timeout \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out. -/// \return status code that indicates the execution status of the function. -osStatus_t osSemaphoreAcquire (osSemaphoreId_t semaphore_id, uint32_t timeout); - -/// Release a Semaphore token up to the initial maximum count. -/// \param[in] semaphore_id semaphore ID obtained by \ref osSemaphoreNew. -/// \return status code that indicates the execution status of the function. -osStatus_t osSemaphoreRelease (osSemaphoreId_t semaphore_id); - -/// Get current Semaphore token count. -/// \param[in] semaphore_id semaphore ID obtained by \ref osSemaphoreNew. -/// \return number of tokens available. -uint32_t osSemaphoreGetCount (osSemaphoreId_t semaphore_id); - -/// Delete a Semaphore object. -/// \param[in] semaphore_id semaphore ID obtained by \ref osSemaphoreNew. -/// \return status code that indicates the execution status of the function. -osStatus_t osSemaphoreDelete (osSemaphoreId_t semaphore_id); - - -// ==== Memory Pool Management Functions ==== - -/// Create and Initialize a Memory Pool object. -/// \param[in] block_count maximum number of memory blocks in memory pool. -/// \param[in] block_size memory block size in bytes. -/// \param[in] attr memory pool attributes; NULL: default values. -/// \return memory pool ID for reference by other functions or NULL in case of error. -osMemoryPoolId_t osMemoryPoolNew (uint32_t block_count, uint32_t block_size, const osMemoryPoolAttr_t *attr); - -/// Get name of a Memory Pool object. -/// \param[in] mp_id memory pool ID obtained by \ref osMemoryPoolNew. -/// \return name as null-terminated string. -const char *osMemoryPoolGetName (osMemoryPoolId_t mp_id); - -/// Allocate a memory block from a Memory Pool. -/// \param[in] mp_id memory pool ID obtained by \ref osMemoryPoolNew. -/// \param[in] timeout \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out. -/// \return address of the allocated memory block or NULL in case of no memory is available. -void *osMemoryPoolAlloc (osMemoryPoolId_t mp_id, uint32_t timeout); - -/// Return an allocated memory block back to a Memory Pool. -/// \param[in] mp_id memory pool ID obtained by \ref osMemoryPoolNew. -/// \param[in] block address of the allocated memory block to be returned to the memory pool. -/// \return status code that indicates the execution status of the function. -osStatus_t osMemoryPoolFree (osMemoryPoolId_t mp_id, void *block); - -/// Get maximum number of memory blocks in a Memory Pool. -/// \param[in] mp_id memory pool ID obtained by \ref osMemoryPoolNew. -/// \return maximum number of memory blocks. -uint32_t osMemoryPoolGetCapacity (osMemoryPoolId_t mp_id); - -/// Get memory block size in a Memory Pool. -/// \param[in] mp_id memory pool ID obtained by \ref osMemoryPoolNew. -/// \return memory block size in bytes. -uint32_t osMemoryPoolGetBlockSize (osMemoryPoolId_t mp_id); - -/// Get number of memory blocks used in a Memory Pool. -/// \param[in] mp_id memory pool ID obtained by \ref osMemoryPoolNew. -/// \return number of memory blocks used. -uint32_t osMemoryPoolGetCount (osMemoryPoolId_t mp_id); - -/// Get number of memory blocks available in a Memory Pool. -/// \param[in] mp_id memory pool ID obtained by \ref osMemoryPoolNew. -/// \return number of memory blocks available. -uint32_t osMemoryPoolGetSpace (osMemoryPoolId_t mp_id); - -/// Delete a Memory Pool object. -/// \param[in] mp_id memory pool ID obtained by \ref osMemoryPoolNew. -/// \return status code that indicates the execution status of the function. -osStatus_t osMemoryPoolDelete (osMemoryPoolId_t mp_id); - - -// ==== Message Queue Management Functions ==== - -/// Create and Initialize a Message Queue object. -/// \param[in] msg_count maximum number of messages in queue. -/// \param[in] msg_size maximum message size in bytes. -/// \param[in] attr message queue attributes; NULL: default values. -/// \return message queue ID for reference by other functions or NULL in case of error. -osMessageQueueId_t osMessageQueueNew (uint32_t msg_count, uint32_t msg_size, const osMessageQueueAttr_t *attr); - -/// Get name of a Message Queue object. -/// \param[in] mq_id message queue ID obtained by \ref osMessageQueueNew. -/// \return name as null-terminated string. -const char *osMessageQueueGetName (osMessageQueueId_t mq_id); - -/// Put a Message into a Queue or timeout if Queue is full. -/// \param[in] mq_id message queue ID obtained by \ref osMessageQueueNew. -/// \param[in] msg_ptr pointer to buffer with message to put into a queue. -/// \param[in] msg_prio message priority. -/// \param[in] timeout \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out. -/// \return status code that indicates the execution status of the function. -osStatus_t osMessageQueuePut (osMessageQueueId_t mq_id, const void *msg_ptr, uint8_t msg_prio, uint32_t timeout); - -/// Get a Message from a Queue or timeout if Queue is empty. -/// \param[in] mq_id message queue ID obtained by \ref osMessageQueueNew. -/// \param[out] msg_ptr pointer to buffer for message to get from a queue. -/// \param[out] msg_prio pointer to buffer for message priority or NULL. -/// \param[in] timeout \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out. -/// \return status code that indicates the execution status of the function. -osStatus_t osMessageQueueGet (osMessageQueueId_t mq_id, void *msg_ptr, uint8_t *msg_prio, uint32_t timeout); - -/// Get maximum number of messages in a Message Queue. -/// \param[in] mq_id message queue ID obtained by \ref osMessageQueueNew. -/// \return maximum number of messages. -uint32_t osMessageQueueGetCapacity (osMessageQueueId_t mq_id); - -/// Get maximum message size in a Message Queue. -/// \param[in] mq_id message queue ID obtained by \ref osMessageQueueNew. -/// \return maximum message size in bytes. -uint32_t osMessageQueueGetMsgSize (osMessageQueueId_t mq_id); - -/// Get number of queued messages in a Message Queue. -/// \param[in] mq_id message queue ID obtained by \ref osMessageQueueNew. -/// \return number of queued messages. -uint32_t osMessageQueueGetCount (osMessageQueueId_t mq_id); - -/// Get number of available slots for messages in a Message Queue. -/// \param[in] mq_id message queue ID obtained by \ref osMessageQueueNew. -/// \return number of available slots for messages. -uint32_t osMessageQueueGetSpace (osMessageQueueId_t mq_id); - -/// Reset a Message Queue to initial empty state. -/// \param[in] mq_id message queue ID obtained by \ref osMessageQueueNew. -/// \return status code that indicates the execution status of the function. -osStatus_t osMessageQueueReset (osMessageQueueId_t mq_id); - -/// Delete a Message Queue object. -/// \param[in] mq_id message queue ID obtained by \ref osMessageQueueNew. -/// \return status code that indicates the execution status of the function. -osStatus_t osMessageQueueDelete (osMessageQueueId_t mq_id); - - -#ifdef __cplusplus -} -#endif - -#endif // CMSIS_OS2_H_ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Include/os_tick.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Include/os_tick.h deleted file mode 100644 index 3cfd895..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Include/os_tick.h +++ /dev/null @@ -1,80 +0,0 @@ -/**************************************************************************//** - * @file os_tick.h - * @brief CMSIS OS Tick header file - * @version V1.0.2 - * @date 19. March 2021 - ******************************************************************************/ -/* - * Copyright (c) 2017-2021 ARM Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef OS_TICK_H -#define OS_TICK_H - -#include - -#ifdef __cplusplus -extern "C" -{ -#endif - -/// IRQ Handler. -#ifndef IRQHANDLER_T -#define IRQHANDLER_T -typedef void (*IRQHandler_t) (void); -#endif - -/// Setup OS Tick timer to generate periodic RTOS Kernel Ticks -/// \param[in] freq tick frequency in Hz -/// \param[in] handler tick IRQ handler -/// \return 0 on success, -1 on error. -int32_t OS_Tick_Setup (uint32_t freq, IRQHandler_t handler); - -/// Enable OS Tick timer interrupt -void OS_Tick_Enable (void); - -/// Disable OS Tick timer interrupt -void OS_Tick_Disable (void); - -/// Acknowledge execution of OS Tick timer interrupt -void OS_Tick_AcknowledgeIRQ (void); - -/// Get OS Tick timer IRQ number -/// \return OS Tick IRQ number -int32_t OS_Tick_GetIRQn (void); - -/// Get OS Tick timer clock frequency -/// \return OS Tick timer clock frequency in Hz -uint32_t OS_Tick_GetClock (void); - -/// Get OS Tick timer interval reload value -/// \return OS Tick timer interval reload value -uint32_t OS_Tick_GetInterval (void); - -/// Get OS Tick timer counter value -/// \return OS Tick timer counter value -uint32_t OS_Tick_GetCount (void); - -/// Get OS Tick timer overflow status -/// \return OS Tick overflow status (1 - overflow, 0 - no overflow). -uint32_t OS_Tick_GetOverflow (void); - -#ifdef __cplusplus -} -#endif - -#endif /* OS_TICK_H */ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Source/os_systick.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Source/os_systick.c deleted file mode 100644 index 3cce53c..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Source/os_systick.c +++ /dev/null @@ -1,133 +0,0 @@ -/**************************************************************************//** - * @file os_systick.c - * @brief CMSIS OS Tick SysTick implementation - * @version V1.0.3 - * @date 19. March 2021 - ******************************************************************************/ -/* - * Copyright (c) 2017-2021 ARM Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "os_tick.h" - -//lint -emacro((923,9078),SCB,SysTick) "cast from unsigned long to pointer" -#include "RTE_Components.h" -#include CMSIS_device_header - -#ifdef SysTick - -#ifndef SYSTICK_IRQ_PRIORITY -#define SYSTICK_IRQ_PRIORITY 0xFFU -#endif - -static uint8_t PendST __attribute__((section(".bss.os"))); - -// Setup OS Tick. -__WEAK int32_t OS_Tick_Setup (uint32_t freq, IRQHandler_t handler) { - uint32_t load; - (void)handler; - - if (freq == 0U) { - //lint -e{904} "Return statement before end of function" - return (-1); - } - - load = (SystemCoreClock / freq) - 1U; - if (load > 0x00FFFFFFU) { - //lint -e{904} "Return statement before end of function" - return (-1); - } - - // Set SysTick Interrupt Priority -#if ((defined(__ARM_ARCH_8M_MAIN__) && (__ARM_ARCH_8M_MAIN__ != 0)) || \ - (defined(__ARM_ARCH_8_1M_MAIN__) && (__ARM_ARCH_8_1M_MAIN__ != 0)) || \ - (defined(__CORTEX_M) && (__CORTEX_M == 7U))) - SCB->SHPR[11] = SYSTICK_IRQ_PRIORITY; -#elif (defined(__ARM_ARCH_8M_BASE__) && (__ARM_ARCH_8M_BASE__ != 0)) - SCB->SHPR[1] |= ((uint32_t)SYSTICK_IRQ_PRIORITY << 24); -#elif ((defined(__ARM_ARCH_7M__) && (__ARM_ARCH_7M__ != 0)) || \ - (defined(__ARM_ARCH_7EM__) && (__ARM_ARCH_7EM__ != 0))) - SCB->SHP[11] = SYSTICK_IRQ_PRIORITY; -#elif (defined(__ARM_ARCH_6M__) && (__ARM_ARCH_6M__ != 0)) - SCB->SHP[1] |= ((uint32_t)SYSTICK_IRQ_PRIORITY << 24); -#else -#error "Unknown ARM Core!" -#endif - - SysTick->CTRL = SysTick_CTRL_CLKSOURCE_Msk | SysTick_CTRL_TICKINT_Msk; - SysTick->LOAD = load; - SysTick->VAL = 0U; - - PendST = 0U; - - return (0); -} - -/// Enable OS Tick. -__WEAK void OS_Tick_Enable (void) { - - if (PendST != 0U) { - PendST = 0U; - SCB->ICSR = SCB_ICSR_PENDSTSET_Msk; - } - - SysTick->CTRL |= SysTick_CTRL_ENABLE_Msk; -} - -/// Disable OS Tick. -__WEAK void OS_Tick_Disable (void) { - - SysTick->CTRL &= ~SysTick_CTRL_ENABLE_Msk; - - if ((SCB->ICSR & SCB_ICSR_PENDSTSET_Msk) != 0U) { - SCB->ICSR = SCB_ICSR_PENDSTCLR_Msk; - PendST = 1U; - } -} - -// Acknowledge OS Tick IRQ. -__WEAK void OS_Tick_AcknowledgeIRQ (void) { - (void)SysTick->CTRL; -} - -// Get OS Tick IRQ number. -__WEAK int32_t OS_Tick_GetIRQn (void) { - return ((int32_t)SysTick_IRQn); -} - -// Get OS Tick clock. -__WEAK uint32_t OS_Tick_GetClock (void) { - return (SystemCoreClock); -} - -// Get OS Tick interval. -__WEAK uint32_t OS_Tick_GetInterval (void) { - return (SysTick->LOAD + 1U); -} - -// Get OS Tick count value. -__WEAK uint32_t OS_Tick_GetCount (void) { - uint32_t load = SysTick->LOAD; - return (load - SysTick->VAL); -} - -// Get OS Tick overflow status. -__WEAK uint32_t OS_Tick_GetOverflow (void) { - return ((SCB->ICSR & SCB_ICSR_PENDSTSET_Msk) >> SCB_ICSR_PENDSTSET_Pos); -} - -#endif // SysTick diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Source/os_tick_gtim.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Source/os_tick_gtim.c deleted file mode 100644 index 22cfa93..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Source/os_tick_gtim.c +++ /dev/null @@ -1,187 +0,0 @@ -/**************************************************************************//** - * @file os_tick_gtim.c - * @brief CMSIS OS Tick implementation for Generic Timer - * @version V1.0.1 - * @date 24. November 2017 - ******************************************************************************/ -/* - * Copyright (c) 2017 ARM Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "os_tick.h" -#include "irq_ctrl.h" - -#include "RTE_Components.h" -#include CMSIS_device_header - -#ifndef GTIM_IRQ_PRIORITY -#define GTIM_IRQ_PRIORITY 0xFFU -#endif - -#ifndef GTIM_IRQ_NUM -#define GTIM_IRQ_NUM SecurePhyTimer_IRQn -#endif - -// Timer interrupt pending flag -static uint8_t GTIM_PendIRQ; - -// Timer tick frequency -static uint32_t GTIM_Clock; - -// Timer load value -static uint32_t GTIM_Load; - -// Setup OS Tick. -int32_t OS_Tick_Setup (uint32_t freq, IRQHandler_t handler) { - uint32_t prio, bits; - - if (freq == 0U) { - return (-1); - } - - GTIM_PendIRQ = 0U; - - // Get timer clock -#ifdef SCTR_BASE - GTIM_Clock = *(uint32_t*)(SCTR_BASE+0x20); -#else - // FVP REFCLK CNTControl 100MHz - GTIM_Clock = 100000000UL; -#endif - - PL1_SetCounterFrequency(GTIM_Clock); - - // Calculate load value - GTIM_Load = (GTIM_Clock / freq) - 1U; - - // Disable Generic Timer and set load value - PL1_SetControl(0U); - PL1_SetLoadValue(GTIM_Load); - - // Disable corresponding IRQ - IRQ_Disable(GTIM_IRQ_NUM); - IRQ_ClearPending(GTIM_IRQ_NUM); - - // Determine number of implemented priority bits - IRQ_SetPriority(GTIM_IRQ_NUM, 0xFFU); - - prio = IRQ_GetPriority(GTIM_IRQ_NUM); - - // At least bits [7:4] must be implemented - if ((prio & 0xF0U) == 0U) { - return (-1); - } - - for (bits = 0; bits < 4; bits++) { - if ((prio & 0x01) != 0) { - break; - } - prio >>= 1; - } - - // Adjust configured priority to the number of implemented priority bits - prio = (GTIM_IRQ_PRIORITY << bits) & 0xFFUL; - - // Set Private Timer interrupt priority - IRQ_SetPriority(GTIM_IRQ_NUM, prio-1U); - - // Set edge-triggered IRQ - IRQ_SetMode(GTIM_IRQ_NUM, IRQ_MODE_TRIG_EDGE); - - // Register tick interrupt handler function - IRQ_SetHandler(GTIM_IRQ_NUM, handler); - - // Enable corresponding interrupt - IRQ_Enable(GTIM_IRQ_NUM); - - // Enable system counter and timer control -#ifdef SCTR_BASE - *(uint32_t*)SCTR_BASE |= 3U; -#endif - - // Enable timer control - PL1_SetControl(1U); - - return (0); -} - -/// Enable OS Tick. -void OS_Tick_Enable (void) { - uint32_t ctrl; - - // Set pending interrupt if flag set - if (GTIM_PendIRQ != 0U) { - GTIM_PendIRQ = 0U; - IRQ_SetPending (GTIM_IRQ_NUM); - } - - // Start the Private Timer - ctrl = PL1_GetControl(); - // Set bit: Timer enable - ctrl |= 1U; - PL1_SetControl(ctrl); -} - -/// Disable OS Tick. -void OS_Tick_Disable (void) { - uint32_t ctrl; - - // Stop the Private Timer - ctrl = PL1_GetControl(); - // Clear bit: Timer enable - ctrl &= ~1U; - PL1_SetControl(ctrl); - - // Remember pending interrupt flag - if (IRQ_GetPending(GTIM_IRQ_NUM) != 0) { - IRQ_ClearPending(GTIM_IRQ_NUM); - GTIM_PendIRQ = 1U; - } -} - -// Acknowledge OS Tick IRQ. -void OS_Tick_AcknowledgeIRQ (void) { - IRQ_ClearPending (GTIM_IRQ_NUM); - PL1_SetLoadValue(GTIM_Load); -} - -// Get OS Tick IRQ number. -int32_t OS_Tick_GetIRQn (void) { - return (GTIM_IRQ_NUM); -} - -// Get OS Tick clock. -uint32_t OS_Tick_GetClock (void) { - return (GTIM_Clock); -} - -// Get OS Tick interval. -uint32_t OS_Tick_GetInterval (void) { - return (GTIM_Load + 1U); -} - -// Get OS Tick count value. -uint32_t OS_Tick_GetCount (void) { - return (GTIM_Load - PL1_GetCurrentValue()); -} - -// Get OS Tick overflow status. -uint32_t OS_Tick_GetOverflow (void) { - CNTP_CTL_Type cntp_ctl; - cntp_ctl.w = PL1_GetControl(); - return (cntp_ctl.b.ISTATUS); -} diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Source/os_tick_ptim.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Source/os_tick_ptim.c deleted file mode 100644 index e75ac3a..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Source/os_tick_ptim.c +++ /dev/null @@ -1,165 +0,0 @@ -/**************************************************************************//** - * @file os_tick_ptim.c - * @brief CMSIS OS Tick implementation for Private Timer - * @version V1.0.2 - * @date 02. March 2018 - ******************************************************************************/ -/* - * Copyright (c) 2017-2018 Arm Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "RTE_Components.h" -#include CMSIS_device_header - -#if defined(PTIM) - -#include "os_tick.h" -#include "irq_ctrl.h" - -#ifndef PTIM_IRQ_PRIORITY -#define PTIM_IRQ_PRIORITY 0xFFU -#endif - -static uint8_t PTIM_PendIRQ; // Timer interrupt pending flag - -// Setup OS Tick. -int32_t OS_Tick_Setup (uint32_t freq, IRQHandler_t handler) { - uint32_t load; - uint32_t prio; - uint32_t bits; - - if (freq == 0U) { - return (-1); - } - - PTIM_PendIRQ = 0U; - - // Private Timer runs with the system frequency - load = (SystemCoreClock / freq) - 1U; - - // Disable Private Timer and set load value - PTIM_SetControl (0U); - PTIM_SetLoadValue (load); - - // Disable corresponding IRQ - IRQ_Disable (PrivTimer_IRQn); - IRQ_ClearPending(PrivTimer_IRQn); - - // Determine number of implemented priority bits - IRQ_SetPriority (PrivTimer_IRQn, 0xFFU); - - prio = IRQ_GetPriority (PrivTimer_IRQn); - - // At least bits [7:4] must be implemented - if ((prio & 0xF0U) == 0U) { - return (-1); - } - - for (bits = 0; bits < 4; bits++) { - if ((prio & 0x01) != 0) { - break; - } - prio >>= 1; - } - - // Adjust configured priority to the number of implemented priority bits - prio = (PTIM_IRQ_PRIORITY << bits) & 0xFFUL; - - // Set Private Timer interrupt priority - IRQ_SetPriority(PrivTimer_IRQn, prio-1U); - - // Set edge-triggered IRQ - IRQ_SetMode(PrivTimer_IRQn, IRQ_MODE_TRIG_EDGE); - - // Register tick interrupt handler function - IRQ_SetHandler(PrivTimer_IRQn, handler); - - // Enable corresponding interrupt - IRQ_Enable (PrivTimer_IRQn); - - // Set bits: IRQ enable and Auto reload - PTIM_SetControl (0x06U); - - return (0); -} - -/// Enable OS Tick. -void OS_Tick_Enable (void) { - uint32_t ctrl; - - // Set pending interrupt if flag set - if (PTIM_PendIRQ != 0U) { - PTIM_PendIRQ = 0U; - IRQ_SetPending (PrivTimer_IRQn); - } - - // Start the Private Timer - ctrl = PTIM_GetControl(); - // Set bit: Timer enable - ctrl |= 1U; - PTIM_SetControl (ctrl); -} - -/// Disable OS Tick. -void OS_Tick_Disable (void) { - uint32_t ctrl; - - // Stop the Private Timer - ctrl = PTIM_GetControl(); - // Clear bit: Timer enable - ctrl &= ~1U; - PTIM_SetControl (ctrl); - - // Remember pending interrupt flag - if (IRQ_GetPending(PrivTimer_IRQn) != 0) { - IRQ_ClearPending (PrivTimer_IRQn); - PTIM_PendIRQ = 1U; - } -} - -// Acknowledge OS Tick IRQ. -void OS_Tick_AcknowledgeIRQ (void) { - PTIM_ClearEventFlag(); -} - -// Get OS Tick IRQ number. -int32_t OS_Tick_GetIRQn (void) { - return (PrivTimer_IRQn); -} - -// Get OS Tick clock. -uint32_t OS_Tick_GetClock (void) { - return (SystemCoreClock); -} - -// Get OS Tick interval. -uint32_t OS_Tick_GetInterval (void) { - return (PTIM_GetLoadValue() + 1U); -} - -// Get OS Tick count value. -uint32_t OS_Tick_GetCount (void) { - uint32_t load = PTIM_GetLoadValue(); - return (load - PTIM_GetCurrentValue()); -} - -// Get OS Tick overflow status. -uint32_t OS_Tick_GetOverflow (void) { - return (PTIM->ISR & 1); -} - -#endif // PTIM diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Template/cmsis_os.h b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Template/cmsis_os.h deleted file mode 100644 index 376dbf7..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Template/cmsis_os.h +++ /dev/null @@ -1,922 +0,0 @@ -/* - * Copyright (c) 2013-2018 Arm Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * ---------------------------------------------------------------------- - * - * $Date: 18. June 2018 - * $Revision: V2.1.3 - * - * Project: CMSIS-RTOS API - * Title: cmsis_os.h template header file - * - * Version 0.02 - * Initial Proposal Phase - * Version 0.03 - * osKernelStart added, optional feature: main started as thread - * osSemaphores have standard behavior - * osTimerCreate does not start the timer, added osTimerStart - * osThreadPass is renamed to osThreadYield - * Version 1.01 - * Support for C++ interface - * - const attribute removed from the osXxxxDef_t typedefs - * - const attribute added to the osXxxxDef macros - * Added: osTimerDelete, osMutexDelete, osSemaphoreDelete - * Added: osKernelInitialize - * Version 1.02 - * Control functions for short timeouts in microsecond resolution: - * Added: osKernelSysTick, osKernelSysTickFrequency, osKernelSysTickMicroSec - * Removed: osSignalGet - * Version 2.0.0 - * OS objects creation without macros (dynamic creation and resource allocation): - * - added: osXxxxNew functions which replace osXxxxCreate - * - added: osXxxxAttr_t structures - * - deprecated: osXxxxCreate functions, osXxxxDef_t structures - * - deprecated: osXxxxDef and osXxxx macros - * osStatus codes simplified and renamed to osStatus_t - * osEvent return structure deprecated - * Kernel: - * - added: osKernelInfo_t and osKernelGetInfo - * - added: osKernelState_t and osKernelGetState (replaces osKernelRunning) - * - added: osKernelLock, osKernelUnlock - * - added: osKernelSuspend, osKernelResume - * - added: osKernelGetTickCount, osKernelGetTickFreq - * - renamed osKernelSysTick to osKernelGetSysTimerCount - * - replaced osKernelSysTickFrequency with osKernelGetSysTimerFreq - * - deprecated osKernelSysTickMicroSec - * Thread: - * - extended number of thread priorities - * - renamed osPrioriry to osPrioriry_t - * - replaced osThreadCreate with osThreadNew - * - added: osThreadGetName - * - added: osThreadState_t and osThreadGetState - * - added: osThreadGetStackSize, osThreadGetStackSpace - * - added: osThreadSuspend, osThreadResume - * - added: osThreadJoin, osThreadDetach, osThreadExit - * - added: osThreadGetCount, osThreadEnumerate - * - added: Thread Flags (moved from Signals) - * Signals: - * - renamed osSignals to osThreadFlags (moved to Thread Flags) - * - changed return value of Set/Clear/Wait functions - * - Clear function limited to current running thread - * - extended Wait function (options) - * - added: osThreadFlagsGet - * Event Flags: - * - added new independent object for handling Event Flags - * Delay and Wait functions: - * - added: osDelayUntil - * - deprecated: osWait - * Timer: - * - replaced osTimerCreate with osTimerNew - * - added: osTimerGetName, osTimerIsRunning - * Mutex: - * - extended: attributes (Recursive, Priority Inherit, Robust) - * - replaced osMutexCreate with osMutexNew - * - renamed osMutexWait to osMutexAcquire - * - added: osMutexGetName, osMutexGetOwner - * Semaphore: - * - extended: maximum and initial token count - * - replaced osSemaphoreCreate with osSemaphoreNew - * - renamed osSemaphoreWait to osSemaphoreAcquire (changed return value) - * - added: osSemaphoreGetName, osSemaphoreGetCount - * Memory Pool: - * - using osMemoryPool prefix instead of osPool - * - replaced osPoolCreate with osMemoryPoolNew - * - extended osMemoryPoolAlloc (timeout) - * - added: osMemoryPoolGetName - * - added: osMemoryPoolGetCapacity, osMemoryPoolGetBlockSize - * - added: osMemoryPoolGetCount, osMemoryPoolGetSpace - * - added: osMemoryPoolDelete - * - deprecated: osPoolCAlloc - * Message Queue: - * - extended: fixed size message instead of a single 32-bit value - * - using osMessageQueue prefix instead of osMessage - * - replaced osMessageCreate with osMessageQueueNew - * - updated: osMessageQueuePut, osMessageQueueGet - * - added: osMessageQueueGetName - * - added: osMessageQueueGetCapacity, osMessageQueueGetMsgSize - * - added: osMessageQueueGetCount, osMessageQueueGetSpace - * - added: osMessageQueueReset, osMessageQueueDelete - * Mail Queue: - * - deprecated (superseded by extended Message Queue functionality) - * Version 2.1.0 - * Support for critical and uncritical sections (nesting safe): - * - updated: osKernelLock, osKernelUnlock - * - added: osKernelRestoreLock - * Updated Thread and Event Flags: - * - changed flags parameter and return type from int32_t to uint32_t - * Version 2.1.1 - * Additional functions allowed to be called from Interrupt Service Routines: - * - osKernelGetTickCount, osKernelGetTickFreq - * Changed Kernel Tick type to uint32_t: - * - updated: osKernelGetTickCount, osDelayUntil - * Version 2.1.2 - * Additional functions allowed to be called from Interrupt Service Routines: - * - osKernelGetInfo, osKernelGetState - * Version 2.1.3 - * Additional functions allowed to be called from Interrupt Service Routines: - * - osThreadGetId - *---------------------------------------------------------------------------*/ - -#ifndef CMSIS_OS_H_ -#define CMSIS_OS_H_ - -/// \b osCMSIS identifies the CMSIS-RTOS API version. -#define osCMSIS 0x20001U ///< API version (main[31:16].sub[15:0]) - -/// \note CAN BE CHANGED: \b osCMSIS_KERNEL identifies the underlying RTOS kernel and version number. -#define osCMSIS_KERNEL 0x10000U ///< RTOS identification and version (main[31:16].sub[15:0]) - -/// \note CAN BE CHANGED: \b osKernelSystemId identifies the underlying RTOS kernel. -#define osKernelSystemId "KERNEL V1.0" ///< RTOS identification string - -/// \note CAN BE CHANGED: \b osFeature_xxx identifies RTOS features. -#define osFeature_MainThread 0 ///< main thread 1=main can be thread, 0=not available -#define osFeature_Signals 16U ///< maximum number of Signal Flags available per thread -#define osFeature_Semaphore 65535U ///< maximum count for \ref osSemaphoreCreate function -#define osFeature_Wait 0 ///< osWait function: 1=available, 0=not available -#define osFeature_SysTick 1 ///< osKernelSysTick functions: 1=available, 0=not available -#define osFeature_Pool 1 ///< Memory Pools: 1=available, 0=not available -#define osFeature_MessageQ 1 ///< Message Queues: 1=available, 0=not available -#define osFeature_MailQ 1 ///< Mail Queues: 1=available, 0=not available - -#if (osCMSIS >= 0x20000U) -#include "cmsis_os2.h" -#else -#include -#include -#endif - -#ifdef __cplusplus -extern "C" -{ -#endif - - -// ==== Enumerations, structures, defines ==== - -/// Priority values. -#if (osCMSIS < 0x20000U) -typedef enum { - osPriorityIdle = -3, ///< Priority: idle (lowest) - osPriorityLow = -2, ///< Priority: low - osPriorityBelowNormal = -1, ///< Priority: below normal - osPriorityNormal = 0, ///< Priority: normal (default) - osPriorityAboveNormal = +1, ///< Priority: above normal - osPriorityHigh = +2, ///< Priority: high - osPriorityRealtime = +3, ///< Priority: realtime (highest) - osPriorityError = 0x84, ///< System cannot determine priority or illegal priority. - osPriorityReserved = 0x7FFFFFFF ///< Prevents enum down-size compiler optimization. -} osPriority; -#else -#define osPriority osPriority_t -#endif - -/// Entry point of a thread. -typedef void (*os_pthread) (void const *argument); - -/// Entry point of a timer call back function. -typedef void (*os_ptimer) (void const *argument); - -/// Timer type. -#if (osCMSIS < 0x20000U) -typedef enum { - osTimerOnce = 0, ///< One-shot timer. - osTimerPeriodic = 1 ///< Repeating timer. -} os_timer_type; -#else -#define os_timer_type osTimerType_t -#endif - -/// Timeout value. -#define osWaitForever 0xFFFFFFFFU ///< Wait forever timeout value. - -/// Status code values returned by CMSIS-RTOS functions. -#if (osCMSIS < 0x20000U) -typedef enum { - osOK = 0, ///< Function completed; no error or event occurred. - osEventSignal = 0x08, ///< Function completed; signal event occurred. - osEventMessage = 0x10, ///< Function completed; message event occurred. - osEventMail = 0x20, ///< Function completed; mail event occurred. - osEventTimeout = 0x40, ///< Function completed; timeout occurred. - osErrorParameter = 0x80, ///< Parameter error: a mandatory parameter was missing or specified an incorrect object. - osErrorResource = 0x81, ///< Resource not available: a specified resource was not available. - osErrorTimeoutResource = 0xC1, ///< Resource not available within given time: a specified resource was not available within the timeout period. - osErrorISR = 0x82, ///< Not allowed in ISR context: the function cannot be called from interrupt service routines. - osErrorISRRecursive = 0x83, ///< Function called multiple times from ISR with same object. - osErrorPriority = 0x84, ///< System cannot determine priority or thread has illegal priority. - osErrorNoMemory = 0x85, ///< System is out of memory: it was impossible to allocate or reserve memory for the operation. - osErrorValue = 0x86, ///< Value of a parameter is out of range. - osErrorOS = 0xFF, ///< Unspecified RTOS error: run-time error but no other error message fits. - osStatusReserved = 0x7FFFFFFF ///< Prevents enum down-size compiler optimization. -} osStatus; -#else -typedef int32_t osStatus; -#define osEventSignal (0x08) -#define osEventMessage (0x10) -#define osEventMail (0x20) -#define osEventTimeout (0x40) -#define osErrorOS osError -#define osErrorTimeoutResource osErrorTimeout -#define osErrorISRRecursive (-126) -#define osErrorValue (-127) -#define osErrorPriority (-128) -#endif - - -// >>> the following data type definitions may be adapted towards a specific RTOS - -/// Thread ID identifies the thread. -/// \note CAN BE CHANGED: \b implementation specific in every CMSIS-RTOS. -#if (osCMSIS < 0x20000U) -typedef void *osThreadId; -#else -#define osThreadId osThreadId_t -#endif - -/// Timer ID identifies the timer. -/// \note CAN BE CHANGED: \b implementation specific in every CMSIS-RTOS. -#if (osCMSIS < 0x20000U) -typedef void *osTimerId; -#else -#define osTimerId osTimerId_t -#endif - -/// Mutex ID identifies the mutex. -/// \note CAN BE CHANGED: \b implementation specific in every CMSIS-RTOS. -#if (osCMSIS < 0x20000U) -typedef void *osMutexId; -#else -#define osMutexId osMutexId_t -#endif - -/// Semaphore ID identifies the semaphore. -/// \note CAN BE CHANGED: \b implementation specific in every CMSIS-RTOS. -#if (osCMSIS < 0x20000U) -typedef void *osSemaphoreId; -#else -#define osSemaphoreId osSemaphoreId_t -#endif - -/// Pool ID identifies the memory pool. -/// \note CAN BE CHANGED: \b implementation specific in every CMSIS-RTOS. -typedef void *osPoolId; - -/// Message ID identifies the message queue. -/// \note CAN BE CHANGED: \b implementation specific in every CMSIS-RTOS. -typedef void *osMessageQId; - -/// Mail ID identifies the mail queue. -/// \note CAN BE CHANGED: \b implementation specific in every CMSIS-RTOS. -typedef void *osMailQId; - - -/// Thread Definition structure contains startup information of a thread. -/// \note CAN BE CHANGED: \b os_thread_def is implementation specific in every CMSIS-RTOS. -#if (osCMSIS < 0x20000U) -typedef struct os_thread_def { - os_pthread pthread; ///< start address of thread function - osPriority tpriority; ///< initial thread priority - uint32_t instances; ///< maximum number of instances of that thread function - uint32_t stacksize; ///< stack size requirements in bytes; 0 is default stack size -} osThreadDef_t; -#else -typedef struct os_thread_def { - os_pthread pthread; ///< start address of thread function - osThreadAttr_t attr; ///< thread attributes -} osThreadDef_t; -#endif - -/// Timer Definition structure contains timer parameters. -/// \note CAN BE CHANGED: \b os_timer_def is implementation specific in every CMSIS-RTOS. -#if (osCMSIS < 0x20000U) -typedef struct os_timer_def { - os_ptimer ptimer; ///< start address of a timer function -} osTimerDef_t; -#else -typedef struct os_timer_def { - os_ptimer ptimer; ///< start address of a timer function - osTimerAttr_t attr; ///< timer attributes -} osTimerDef_t; -#endif - -/// Mutex Definition structure contains setup information for a mutex. -/// \note CAN BE CHANGED: \b os_mutex_def is implementation specific in every CMSIS-RTOS. -#if (osCMSIS < 0x20000U) -typedef struct os_mutex_def { - uint32_t dummy; ///< dummy value -} osMutexDef_t; -#else -#define osMutexDef_t osMutexAttr_t -#endif - -/// Semaphore Definition structure contains setup information for a semaphore. -/// \note CAN BE CHANGED: \b os_semaphore_def is implementation specific in every CMSIS-RTOS. -#if (osCMSIS < 0x20000U) -typedef struct os_semaphore_def { - uint32_t dummy; ///< dummy value -} osSemaphoreDef_t; -#else -#define osSemaphoreDef_t osSemaphoreAttr_t -#endif - -/// Definition structure for memory block allocation. -/// \note CAN BE CHANGED: \b os_pool_def is implementation specific in every CMSIS-RTOS. -#if (osCMSIS < 0x20000U) -typedef struct os_pool_def { - uint32_t pool_sz; ///< number of items (elements) in the pool - uint32_t item_sz; ///< size of an item - void *pool; ///< pointer to memory for pool -} osPoolDef_t; -#else -typedef struct os_pool_def { - uint32_t pool_sz; ///< number of items (elements) in the pool - uint32_t item_sz; ///< size of an item - osMemoryPoolAttr_t attr; ///< memory pool attributes -} osPoolDef_t; -#endif - -/// Definition structure for message queue. -/// \note CAN BE CHANGED: \b os_messageQ_def is implementation specific in every CMSIS-RTOS. -#if (osCMSIS < 0x20000U) -typedef struct os_messageQ_def { - uint32_t queue_sz; ///< number of elements in the queue - void *pool; ///< memory array for messages -} osMessageQDef_t; -#else -typedef struct os_messageQ_def { - uint32_t queue_sz; ///< number of elements in the queue - osMessageQueueAttr_t attr; ///< message queue attributes -} osMessageQDef_t; -#endif - -/// Definition structure for mail queue. -/// \note CAN BE CHANGED: \b os_mailQ_def is implementation specific in every CMSIS-RTOS. -#if (osCMSIS < 0x20000U) -typedef struct os_mailQ_def { - uint32_t queue_sz; ///< number of elements in the queue - uint32_t item_sz; ///< size of an item - void *pool; ///< memory array for mail -} osMailQDef_t; -#else -typedef struct os_mailQ_def { - uint32_t queue_sz; ///< number of elements in the queue - uint32_t item_sz; ///< size of an item - void *mail; ///< pointer to mail - osMemoryPoolAttr_t mp_attr; ///< memory pool attributes - osMessageQueueAttr_t mq_attr; ///< message queue attributes -} osMailQDef_t; -#endif - - -/// Event structure contains detailed information about an event. -typedef struct { - osStatus status; ///< status code: event or error information - union { - uint32_t v; ///< message as 32-bit value - void *p; ///< message or mail as void pointer - int32_t signals; ///< signal flags - } value; ///< event value - union { - osMailQId mail_id; ///< mail id obtained by \ref osMailCreate - osMessageQId message_id; ///< message id obtained by \ref osMessageCreate - } def; ///< event definition -} osEvent; - - -// ==== Kernel Management Functions ==== - -/// Initialize the RTOS Kernel for creating objects. -/// \return status code that indicates the execution status of the function. -#if (osCMSIS < 0x20000U) -osStatus osKernelInitialize (void); -#endif - -/// Start the RTOS Kernel scheduler. -/// \return status code that indicates the execution status of the function. -#if (osCMSIS < 0x20000U) -osStatus osKernelStart (void); -#endif - -/// Check if the RTOS kernel is already started. -/// \return 0 RTOS is not started, 1 RTOS is started. -#if (osCMSIS < 0x20000U) -int32_t osKernelRunning(void); -#endif - -#if (defined(osFeature_SysTick) && (osFeature_SysTick != 0)) // System Timer available - -/// Get the RTOS kernel system timer counter. -/// \return RTOS kernel system timer as 32-bit value -#if (osCMSIS < 0x20000U) -uint32_t osKernelSysTick (void); -#else -#define osKernelSysTick osKernelGetSysTimerCount -#endif - -/// The RTOS kernel system timer frequency in Hz. -/// \note Reflects the system timer setting and is typically defined in a configuration file. -#if (osCMSIS < 0x20000U) -#define osKernelSysTickFrequency 100000000 -#endif - -/// Convert a microseconds value to a RTOS kernel system timer value. -/// \param microsec time value in microseconds. -/// \return time value normalized to the \ref osKernelSysTickFrequency -#if (osCMSIS < 0x20000U) -#define osKernelSysTickMicroSec(microsec) (((uint64_t)microsec * (osKernelSysTickFrequency)) / 1000000) -#else -#define osKernelSysTickMicroSec(microsec) (((uint64_t)microsec * osKernelGetSysTimerFreq()) / 1000000) -#endif - -#endif // System Timer available - - -// ==== Thread Management Functions ==== - -/// Create a Thread Definition with function, priority, and stack requirements. -/// \param name name of the thread function. -/// \param priority initial priority of the thread function. -/// \param instances number of possible thread instances. -/// \param stacksz stack size (in bytes) requirements for the thread function. -/// \note CAN BE CHANGED: The parameters to \b osThreadDef shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#if defined (osObjectsExternal) // object is external -#define osThreadDef(name, priority, instances, stacksz) \ -extern const osThreadDef_t os_thread_def_##name -#else // define the object -#if (osCMSIS < 0x20000U) -#define osThreadDef(name, priority, instances, stacksz) \ -const osThreadDef_t os_thread_def_##name = \ -{ (name), (priority), (instances), (stacksz) } -#else -#define osThreadDef(name, priority, instances, stacksz) \ -const osThreadDef_t os_thread_def_##name = \ -{ (name), \ - { NULL, osThreadDetached, NULL, 0U, NULL, 8*((stacksz+7)/8), (priority), 0U, 0U } } -#endif -#endif - -/// Access a Thread definition. -/// \param name name of the thread definition object. -/// \note CAN BE CHANGED: The parameter to \b osThread shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#define osThread(name) \ -&os_thread_def_##name - -/// Create a thread and add it to Active Threads and set it to state READY. -/// \param[in] thread_def thread definition referenced with \ref osThread. -/// \param[in] argument pointer that is passed to the thread function as start argument. -/// \return thread ID for reference by other functions or NULL in case of error. -osThreadId osThreadCreate (const osThreadDef_t *thread_def, void *argument); - -/// Return the thread ID of the current running thread. -/// \return thread ID for reference by other functions or NULL in case of error. -#if (osCMSIS < 0x20000U) -osThreadId osThreadGetId (void); -#endif - -/// Change priority of a thread. -/// \param[in] thread_id thread ID obtained by \ref osThreadCreate or \ref osThreadGetId. -/// \param[in] priority new priority value for the thread function. -/// \return status code that indicates the execution status of the function. -#if (osCMSIS < 0x20000U) -osStatus osThreadSetPriority (osThreadId thread_id, osPriority priority); -#endif - -/// Get current priority of a thread. -/// \param[in] thread_id thread ID obtained by \ref osThreadCreate or \ref osThreadGetId. -/// \return current priority value of the specified thread. -#if (osCMSIS < 0x20000U) -osPriority osThreadGetPriority (osThreadId thread_id); -#endif - -/// Pass control to next thread that is in state \b READY. -/// \return status code that indicates the execution status of the function. -#if (osCMSIS < 0x20000U) -osStatus osThreadYield (void); -#endif - -/// Terminate execution of a thread. -/// \param[in] thread_id thread ID obtained by \ref osThreadCreate or \ref osThreadGetId. -/// \return status code that indicates the execution status of the function. -#if (osCMSIS < 0x20000U) -osStatus osThreadTerminate (osThreadId thread_id); -#endif - - -// ==== Signal Management ==== - -/// Set the specified Signal Flags of an active thread. -/// \param[in] thread_id thread ID obtained by \ref osThreadCreate or \ref osThreadGetId. -/// \param[in] signals specifies the signal flags of the thread that should be set. -/// \return previous signal flags of the specified thread or 0x80000000 in case of incorrect parameters. -int32_t osSignalSet (osThreadId thread_id, int32_t signals); - -/// Clear the specified Signal Flags of an active thread. -/// \param[in] thread_id thread ID obtained by \ref osThreadCreate or \ref osThreadGetId. -/// \param[in] signals specifies the signal flags of the thread that shall be cleared. -/// \return previous signal flags of the specified thread or 0x80000000 in case of incorrect parameters or call from ISR. -int32_t osSignalClear (osThreadId thread_id, int32_t signals); - -/// Wait for one or more Signal Flags to become signaled for the current \b RUNNING thread. -/// \param[in] signals wait until all specified signal flags set or 0 for any single signal flag. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out. -/// \return event flag information or error code. -osEvent osSignalWait (int32_t signals, uint32_t millisec); - - -// ==== Generic Wait Functions ==== - -/// Wait for Timeout (Time Delay). -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue "time delay" value -/// \return status code that indicates the execution status of the function. -#if (osCMSIS < 0x20000U) -osStatus osDelay (uint32_t millisec); -#endif - -#if (defined (osFeature_Wait) && (osFeature_Wait != 0)) // Generic Wait available - -/// Wait for Signal, Message, Mail, or Timeout. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out -/// \return event that contains signal, message, or mail information or error code. -osEvent osWait (uint32_t millisec); - -#endif // Generic Wait available - - -// ==== Timer Management Functions ==== - -/// Define a Timer object. -/// \param name name of the timer object. -/// \param function name of the timer call back function. -/// \note CAN BE CHANGED: The parameter to \b osTimerDef shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#if defined (osObjectsExternal) // object is external -#define osTimerDef(name, function) \ -extern const osTimerDef_t os_timer_def_##name -#else // define the object -#if (osCMSIS < 0x20000U) -#define osTimerDef(name, function) \ -const osTimerDef_t os_timer_def_##name = { (function) } -#else -#define osTimerDef(name, function) \ -const osTimerDef_t os_timer_def_##name = \ -{ (function), { NULL, 0U, NULL, 0U } } -#endif -#endif - -/// Access a Timer definition. -/// \param name name of the timer object. -/// \note CAN BE CHANGED: The parameter to \b osTimer shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#define osTimer(name) \ -&os_timer_def_##name - -/// Create and Initialize a timer. -/// \param[in] timer_def timer object referenced with \ref osTimer. -/// \param[in] type osTimerOnce for one-shot or osTimerPeriodic for periodic behavior. -/// \param[in] argument argument to the timer call back function. -/// \return timer ID for reference by other functions or NULL in case of error. -osTimerId osTimerCreate (const osTimerDef_t *timer_def, os_timer_type type, void *argument); - -/// Start or restart a timer. -/// \param[in] timer_id timer ID obtained by \ref osTimerCreate. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue "time delay" value of the timer. -/// \return status code that indicates the execution status of the function. -#if (osCMSIS < 0x20000U) -osStatus osTimerStart (osTimerId timer_id, uint32_t millisec); -#endif - -/// Stop a timer. -/// \param[in] timer_id timer ID obtained by \ref osTimerCreate. -/// \return status code that indicates the execution status of the function. -#if (osCMSIS < 0x20000U) -osStatus osTimerStop (osTimerId timer_id); -#endif - -/// Delete a timer. -/// \param[in] timer_id timer ID obtained by \ref osTimerCreate. -/// \return status code that indicates the execution status of the function. -#if (osCMSIS < 0x20000U) -osStatus osTimerDelete (osTimerId timer_id); -#endif - - -// ==== Mutex Management Functions ==== - -/// Define a Mutex. -/// \param name name of the mutex object. -/// \note CAN BE CHANGED: The parameter to \b osMutexDef shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#if defined (osObjectsExternal) // object is external -#define osMutexDef(name) \ -extern const osMutexDef_t os_mutex_def_##name -#else // define the object -#if (osCMSIS < 0x20000U) -#define osMutexDef(name) \ -const osMutexDef_t os_mutex_def_##name = { 0 } -#else -#define osMutexDef(name) \ -const osMutexDef_t os_mutex_def_##name = \ -{ NULL, osMutexRecursive | osMutexPrioInherit | osMutexRobust, NULL, 0U } -#endif -#endif - -/// Access a Mutex definition. -/// \param name name of the mutex object. -/// \note CAN BE CHANGED: The parameter to \b osMutex shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#define osMutex(name) \ -&os_mutex_def_##name - -/// Create and Initialize a Mutex object. -/// \param[in] mutex_def mutex definition referenced with \ref osMutex. -/// \return mutex ID for reference by other functions or NULL in case of error. -osMutexId osMutexCreate (const osMutexDef_t *mutex_def); - -/// Wait until a Mutex becomes available. -/// \param[in] mutex_id mutex ID obtained by \ref osMutexCreate. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out. -/// \return status code that indicates the execution status of the function. -#if (osCMSIS < 0x20000U) -osStatus osMutexWait (osMutexId mutex_id, uint32_t millisec); -#else -#define osMutexWait osMutexAcquire -#endif - -/// Release a Mutex that was obtained by \ref osMutexWait. -/// \param[in] mutex_id mutex ID obtained by \ref osMutexCreate. -/// \return status code that indicates the execution status of the function. -#if (osCMSIS < 0x20000U) -osStatus osMutexRelease (osMutexId mutex_id); -#endif - -/// Delete a Mutex object. -/// \param[in] mutex_id mutex ID obtained by \ref osMutexCreate. -/// \return status code that indicates the execution status of the function. -#if (osCMSIS < 0x20000U) -osStatus osMutexDelete (osMutexId mutex_id); -#endif - - -// ==== Semaphore Management Functions ==== - -#if (defined (osFeature_Semaphore) && (osFeature_Semaphore != 0U)) // Semaphore available - -/// Define a Semaphore object. -/// \param name name of the semaphore object. -/// \note CAN BE CHANGED: The parameter to \b osSemaphoreDef shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#if defined (osObjectsExternal) // object is external -#define osSemaphoreDef(name) \ -extern const osSemaphoreDef_t os_semaphore_def_##name -#else // define the object -#if (osCMSIS < 0x20000U) -#define osSemaphoreDef(name) \ -const osSemaphoreDef_t os_semaphore_def_##name = { 0 } -#else -#define osSemaphoreDef(name) \ -const osSemaphoreDef_t os_semaphore_def_##name = \ -{ NULL, 0U, NULL, 0U } -#endif -#endif - -/// Access a Semaphore definition. -/// \param name name of the semaphore object. -/// \note CAN BE CHANGED: The parameter to \b osSemaphore shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#define osSemaphore(name) \ -&os_semaphore_def_##name - -/// Create and Initialize a Semaphore object. -/// \param[in] semaphore_def semaphore definition referenced with \ref osSemaphore. -/// \param[in] count maximum and initial number of available tokens. -/// \return semaphore ID for reference by other functions or NULL in case of error. -osSemaphoreId osSemaphoreCreate (const osSemaphoreDef_t *semaphore_def, int32_t count); - -/// Wait until a Semaphore token becomes available. -/// \param[in] semaphore_id semaphore object referenced with \ref osSemaphoreCreate. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out. -/// \return number of available tokens, or -1 in case of incorrect parameters. -int32_t osSemaphoreWait (osSemaphoreId semaphore_id, uint32_t millisec); - -/// Release a Semaphore token. -/// \param[in] semaphore_id semaphore object referenced with \ref osSemaphoreCreate. -/// \return status code that indicates the execution status of the function. -#if (osCMSIS < 0x20000U) -osStatus osSemaphoreRelease (osSemaphoreId semaphore_id); -#endif - -/// Delete a Semaphore object. -/// \param[in] semaphore_id semaphore object referenced with \ref osSemaphoreCreate. -/// \return status code that indicates the execution status of the function. -#if (osCMSIS < 0x20000U) -osStatus osSemaphoreDelete (osSemaphoreId semaphore_id); -#endif - -#endif // Semaphore available - - -// ==== Memory Pool Management Functions ==== - -#if (defined(osFeature_Pool) && (osFeature_Pool != 0)) // Memory Pool available - -/// \brief Define a Memory Pool. -/// \param name name of the memory pool. -/// \param no maximum number of blocks (objects) in the memory pool. -/// \param type data type of a single block (object). -/// \note CAN BE CHANGED: The parameter to \b osPoolDef shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#if defined (osObjectsExternal) // object is external -#define osPoolDef(name, no, type) \ -extern const osPoolDef_t os_pool_def_##name -#else // define the object -#if (osCMSIS < 0x20000U) -#define osPoolDef(name, no, type) \ -const osPoolDef_t os_pool_def_##name = \ -{ (no), sizeof(type), NULL } -#else -#define osPoolDef(name, no, type) \ -const osPoolDef_t os_pool_def_##name = \ -{ (no), sizeof(type), { NULL, 0U, NULL, 0U, NULL, 0U } } -#endif -#endif - -/// \brief Access a Memory Pool definition. -/// \param name name of the memory pool -/// \note CAN BE CHANGED: The parameter to \b osPool shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#define osPool(name) \ -&os_pool_def_##name - -/// Create and Initialize a Memory Pool object. -/// \param[in] pool_def memory pool definition referenced with \ref osPool. -/// \return memory pool ID for reference by other functions or NULL in case of error. -osPoolId osPoolCreate (const osPoolDef_t *pool_def); - -/// Allocate a memory block from a Memory Pool. -/// \param[in] pool_id memory pool ID obtain referenced with \ref osPoolCreate. -/// \return address of the allocated memory block or NULL in case of no memory available. -void *osPoolAlloc (osPoolId pool_id); - -/// Allocate a memory block from a Memory Pool and set memory block to zero. -/// \param[in] pool_id memory pool ID obtain referenced with \ref osPoolCreate. -/// \return address of the allocated memory block or NULL in case of no memory available. -void *osPoolCAlloc (osPoolId pool_id); - -/// Return an allocated memory block back to a Memory Pool. -/// \param[in] pool_id memory pool ID obtain referenced with \ref osPoolCreate. -/// \param[in] block address of the allocated memory block to be returned to the memory pool. -/// \return status code that indicates the execution status of the function. -osStatus osPoolFree (osPoolId pool_id, void *block); - -#endif // Memory Pool available - - -// ==== Message Queue Management Functions ==== - -#if (defined(osFeature_MessageQ) && (osFeature_MessageQ != 0)) // Message Queue available - -/// \brief Create a Message Queue Definition. -/// \param name name of the queue. -/// \param queue_sz maximum number of messages in the queue. -/// \param type data type of a single message element (for debugger). -/// \note CAN BE CHANGED: The parameter to \b osMessageQDef shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#if defined (osObjectsExternal) // object is external -#define osMessageQDef(name, queue_sz, type) \ -extern const osMessageQDef_t os_messageQ_def_##name -#else // define the object -#if (osCMSIS < 0x20000U) -#define osMessageQDef(name, queue_sz, type) \ -const osMessageQDef_t os_messageQ_def_##name = \ -{ (queue_sz), NULL } -#else -#define osMessageQDef(name, queue_sz, type) \ -const osMessageQDef_t os_messageQ_def_##name = \ -{ (queue_sz), { NULL, 0U, NULL, 0U, NULL, 0U } } -#endif -#endif - -/// \brief Access a Message Queue Definition. -/// \param name name of the queue -/// \note CAN BE CHANGED: The parameter to \b osMessageQ shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#define osMessageQ(name) \ -&os_messageQ_def_##name - -/// Create and Initialize a Message Queue object. -/// \param[in] queue_def message queue definition referenced with \ref osMessageQ. -/// \param[in] thread_id thread ID (obtained by \ref osThreadCreate or \ref osThreadGetId) or NULL. -/// \return message queue ID for reference by other functions or NULL in case of error. -osMessageQId osMessageCreate (const osMessageQDef_t *queue_def, osThreadId thread_id); - -/// Put a Message to a Queue. -/// \param[in] queue_id message queue ID obtained with \ref osMessageCreate. -/// \param[in] info message information. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out. -/// \return status code that indicates the execution status of the function. -osStatus osMessagePut (osMessageQId queue_id, uint32_t info, uint32_t millisec); - -/// Get a Message from a Queue or timeout if Queue is empty. -/// \param[in] queue_id message queue ID obtained with \ref osMessageCreate. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out. -/// \return event information that includes status code. -osEvent osMessageGet (osMessageQId queue_id, uint32_t millisec); - -#endif // Message Queue available - - -// ==== Mail Queue Management Functions ==== - -#if (defined(osFeature_MailQ) && (osFeature_MailQ != 0)) // Mail Queue available - -/// \brief Create a Mail Queue Definition. -/// \param name name of the queue. -/// \param queue_sz maximum number of mails in the queue. -/// \param type data type of a single mail element. -/// \note CAN BE CHANGED: The parameter to \b osMailQDef shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#if defined (osObjectsExternal) // object is external -#define osMailQDef(name, queue_sz, type) \ -extern const osMailQDef_t os_mailQ_def_##name -#else // define the object -#if (osCMSIS < 0x20000U) -#define osMailQDef(name, queue_sz, type) \ -const osMailQDef_t os_mailQ_def_##name = \ -{ (queue_sz), sizeof(type), NULL } -#else -#define osMailQDef(name, queue_sz, type) \ -static void *os_mail_p_##name[2]; \ -const osMailQDef_t os_mailQ_def_##name = \ -{ (queue_sz), sizeof(type), (&os_mail_p_##name), \ - { NULL, 0U, NULL, 0U, NULL, 0U }, \ - { NULL, 0U, NULL, 0U, NULL, 0U } } -#endif -#endif - -/// \brief Access a Mail Queue Definition. -/// \param name name of the queue -/// \note CAN BE CHANGED: The parameter to \b osMailQ shall be consistent but the -/// macro body is implementation specific in every CMSIS-RTOS. -#define osMailQ(name) \ -&os_mailQ_def_##name - -/// Create and Initialize a Mail Queue object. -/// \param[in] queue_def mail queue definition referenced with \ref osMailQ. -/// \param[in] thread_id thread ID (obtained by \ref osThreadCreate or \ref osThreadGetId) or NULL. -/// \return mail queue ID for reference by other functions or NULL in case of error. -osMailQId osMailCreate (const osMailQDef_t *queue_def, osThreadId thread_id); - -/// Allocate a memory block for mail from a mail memory pool. -/// \param[in] queue_id mail queue ID obtained with \ref osMailCreate. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out -/// \return pointer to memory block that can be filled with mail or NULL in case of error. -void *osMailAlloc (osMailQId queue_id, uint32_t millisec); - -/// Allocate a memory block for mail from a mail memory pool and set memory block to zero. -/// \param[in] queue_id mail queue ID obtained with \ref osMailCreate. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out -/// \return pointer to memory block that can be filled with mail or NULL in case of error. -void *osMailCAlloc (osMailQId queue_id, uint32_t millisec); - -/// Put a Mail into a Queue. -/// \param[in] queue_id mail queue ID obtained with \ref osMailCreate. -/// \param[in] mail pointer to memory with mail to put into a queue. -/// \return status code that indicates the execution status of the function. -osStatus osMailPut (osMailQId queue_id, const void *mail); - -/// Get a Mail from a Queue or timeout if Queue is empty. -/// \param[in] queue_id mail queue ID obtained with \ref osMailCreate. -/// \param[in] millisec \ref CMSIS_RTOS_TimeOutValue or 0 in case of no time-out. -/// \return event information that includes status code. -osEvent osMailGet (osMailQId queue_id, uint32_t millisec); - -/// Free a memory block by returning it to a mail memory pool. -/// \param[in] queue_id mail queue ID obtained with \ref osMailCreate. -/// \param[in] mail pointer to memory block that was obtained with \ref osMailGet. -/// \return status code that indicates the execution status of the function. -osStatus osMailFree (osMailQId queue_id, void *mail); - -#endif // Mail Queue available - - -#ifdef __cplusplus -} -#endif - -#endif // CMSIS_OS_H_ diff --git a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Template/cmsis_os1.c b/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Template/cmsis_os1.c deleted file mode 100644 index de1650c..0000000 --- a/MATLAB/MCU_STM32_Matlab/Drivers/CMSIS/RTOS2/Template/cmsis_os1.c +++ /dev/null @@ -1,361 +0,0 @@ -/* - * Copyright (c) 2013-2017 ARM Limited. All rights reserved. - * - * SPDX-License-Identifier: Apache-2.0 - * - * Licensed under the Apache License, Version 2.0 (the License); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an AS IS BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - * ---------------------------------------------------------------------- - * - * $Date: 10. January 2017 - * $Revision: V1.2 - * - * Project: CMSIS-RTOS API V1 - * Title: cmsis_os_v1.c V1 module file - *---------------------------------------------------------------------------*/ - -#include -#include "cmsis_os.h" - -#if (osCMSIS >= 0x20000U) - - -// Thread -osThreadId osThreadCreate (const osThreadDef_t *thread_def, void *argument) { - - if (thread_def == NULL) { - return (osThreadId)NULL; - } - return osThreadNew((osThreadFunc_t)thread_def->pthread, argument, &thread_def->attr); -} - - -// Signals - -#define SignalMask ((1U< 0U) && (flags < 0x80000000U)) { - event.status = osEventSignal; - event.value.signals = (int32_t)flags; - } else { - switch ((int32_t)flags) { - case osErrorResource: - event.status = osOK; - break; - case osErrorTimeout: - event.status = osEventTimeout; - break; - case osErrorParameter: - event.status = osErrorValue; - break; - default: - event.status = (osStatus)flags; - break; - } - } - return event; -} - - -// Timer -osTimerId osTimerCreate (const osTimerDef_t *timer_def, os_timer_type type, void *argument) { - - if (timer_def == NULL) { - return (osTimerId)NULL; - } - return osTimerNew((osTimerFunc_t)timer_def->ptimer, type, argument, &timer_def->attr); -} - - -// Mutex -osMutexId osMutexCreate (const osMutexDef_t *mutex_def) { - - if (mutex_def == NULL) { - return (osMutexId)NULL; - } - return osMutexNew(mutex_def); -} - - -// Semaphore - -#if (defined (osFeature_Semaphore) && (osFeature_Semaphore != 0U)) - -osSemaphoreId osSemaphoreCreate (const osSemaphoreDef_t *semaphore_def, int32_t count) { - - if (semaphore_def == NULL) { - return (osSemaphoreId)NULL; - } - return osSemaphoreNew((uint32_t)count, (uint32_t)count, semaphore_def); -} - -int32_t osSemaphoreWait (osSemaphoreId semaphore_id, uint32_t millisec) { - osStatus_t status; - uint32_t count; - - status = osSemaphoreAcquire(semaphore_id, millisec); - switch (status) { - case osOK: - count = osSemaphoreGetCount(semaphore_id); - return ((int32_t)count + 1); - case osErrorResource: - case osErrorTimeout: - return 0; - default: - break; - } - return -1; -} - -#endif // Semaphore - - -// Memory Pool - -#if (defined(osFeature_Pool) && (osFeature_Pool != 0)) - -osPoolId osPoolCreate (const osPoolDef_t *pool_def) { - - if (pool_def == NULL) { - return (osPoolId)NULL; - } - return ((osPoolId)(osMemoryPoolNew(pool_def->pool_sz, pool_def->item_sz, &pool_def->attr))); -} - -void *osPoolAlloc (osPoolId pool_id) { - return osMemoryPoolAlloc((osMemoryPoolId_t)pool_id, 0U); -} - -void *osPoolCAlloc (osPoolId pool_id) { - void *block; - uint32_t block_size; - - block_size = osMemoryPoolGetBlockSize((osMemoryPoolId_t)pool_id); - if (block_size == 0U) { - return NULL; - } - block = osMemoryPoolAlloc((osMemoryPoolId_t)pool_id, 0U); - if (block != NULL) { - memset(block, 0, block_size); - } - return block; -} - -osStatus osPoolFree (osPoolId pool_id, void *block) { - return osMemoryPoolFree((osMemoryPoolId_t)pool_id, block); -} - -#endif // Memory Pool - - -// Message Queue - -#if (defined(osFeature_MessageQ) && (osFeature_MessageQ != 0)) - -osMessageQId osMessageCreate (const osMessageQDef_t *queue_def, osThreadId thread_id) { - (void)thread_id; - - if (queue_def == NULL) { - return (osMessageQId)NULL; - } - return ((osMessageQId)(osMessageQueueNew(queue_def->queue_sz, sizeof(uint32_t), &queue_def->attr))); -} - -osStatus osMessagePut (osMessageQId queue_id, uint32_t info, uint32_t millisec) { - return osMessageQueuePut((osMessageQueueId_t)queue_id, &info, 0U, millisec); -} - -osEvent osMessageGet (osMessageQId queue_id, uint32_t millisec) { - osStatus_t status; - osEvent event; - uint32_t message; - - status = osMessageQueueGet((osMessageQueueId_t)queue_id, &message, NULL, millisec); - switch (status) { - case osOK: - event.status = osEventMessage; - event.value.v = message; - break; - case osErrorResource: - event.status = osOK; - break; - case osErrorTimeout: - event.status = osEventTimeout; - break; - default: - event.status = status; - break; - } - return event; -} - -#endif // Message Queue - - -// Mail Queue - -#if (defined(osFeature_MailQ) && (osFeature_MailQ != 0)) - -typedef struct os_mail_queue_s { - osMemoryPoolId_t mp_id; - osMessageQueueId_t mq_id; -} os_mail_queue_t; - -osMailQId osMailCreate (const osMailQDef_t *queue_def, osThreadId thread_id) { - os_mail_queue_t *ptr; - (void)thread_id; - - if (queue_def == NULL) { - return (osMailQId)NULL; - } - - ptr = queue_def->mail; - if (ptr == NULL) { - return (osMailQId)NULL; - } - - ptr->mp_id = osMemoryPoolNew (queue_def->queue_sz, queue_def->item_sz, &queue_def->mp_attr); - ptr->mq_id = osMessageQueueNew(queue_def->queue_sz, sizeof(void *), &queue_def->mq_attr); - if ((ptr->mp_id == (osMemoryPoolId_t)NULL) || (ptr->mq_id == (osMessageQueueId_t)NULL)) { - if (ptr->mp_id != (osMemoryPoolId_t)NULL) { - osMemoryPoolDelete(ptr->mp_id); - } - if (ptr->mq_id != (osMessageQueueId_t)NULL) { - osMessageQueueDelete(ptr->mq_id); - } - return (osMailQId)NULL; - } - - return (osMailQId)ptr; -} - -void *osMailAlloc (osMailQId queue_id, uint32_t millisec) { - os_mail_queue_t *ptr = (os_mail_queue_t *)queue_id; - - if (ptr == NULL) { - return NULL; - } - return osMemoryPoolAlloc(ptr->mp_id, millisec); -} - -void *osMailCAlloc (osMailQId queue_id, uint32_t millisec) { - os_mail_queue_t *ptr = (os_mail_queue_t *)queue_id; - void *block; - uint32_t block_size; - - if (ptr == NULL) { - return NULL; - } - block_size = osMemoryPoolGetBlockSize(ptr->mp_id); - if (block_size == 0U) { - return NULL; - } - block = osMemoryPoolAlloc(ptr->mp_id, millisec); - if (block != NULL) { - memset(block, 0, block_size); - } - - return block; - -} - -osStatus osMailPut (osMailQId queue_id, const void *mail) { - os_mail_queue_t *ptr = (os_mail_queue_t *)queue_id; - - if (ptr == NULL) { - return osErrorParameter; - } - if (mail == NULL) { - return osErrorValue; - } - return osMessageQueuePut(ptr->mq_id, &mail, 0U, 0U); -} - -osEvent osMailGet (osMailQId queue_id, uint32_t millisec) { - os_mail_queue_t *ptr = (os_mail_queue_t *)queue_id; - osStatus_t status; - osEvent event; - void *mail; - - if (ptr == NULL) { - event.status = osErrorParameter; - return event; - } - - status = osMessageQueueGet(ptr->mq_id, &mail, NULL, millisec); - switch (status) { - case osOK: - event.status = osEventMail; - event.value.p = mail; - break; - case osErrorResource: - event.status = osOK; - break; - case osErrorTimeout: - event.status = osEventTimeout; - break; - default: - event.status = status; - break; - } - return event; -} - -osStatus osMailFree (osMailQId queue_id, void *mail) { - os_mail_queue_t *ptr = (os_mail_queue_t *)queue_id; - - if (ptr == NULL) { - return osErrorParameter; - } - if (mail == NULL) { - return osErrorValue; - } - return osMemoryPoolFree(ptr->mp_id, mail); -} - -#endif // Mail Queue - - -#endif // osCMSIS diff --git a/MATLAB/MCU_STM32_Matlab/stm32f4xx_matlab_conf.json b/MATLAB/MCU_STM32_Matlab/stm32f4xx_matlab_conf.json index 596d63e..b1970f0 100644 --- a/MATLAB/MCU_STM32_Matlab/stm32f4xx_matlab_conf.json +++ b/MATLAB/MCU_STM32_Matlab/stm32f4xx_matlab_conf.json @@ -38,6 +38,64 @@ "Default": true, "NewRow": true }, + "CMSIS_DSP": { + "Prompt": "Enable CMSIS DSP Module", + "Def": [ + "ARM_MATH_LOOPUNROLL", + "__GNUC_PYTHON__" + ], + "Includes": [ + "Drivers/CMSIS/DSP/Include", + "Drivers/CMSIS/DSP/PrivateInclude", + "Drivers/CMSIS/DSP/Source/BasicMathFunctions", + "Drivers/CMSIS/DSP/Source/BayesFunctions", + "Drivers/CMSIS/DSP/Source/CommonTables", + "Drivers/CMSIS/DSP/Source/ComplexMathFunctions", + "Drivers/CMSIS/DSP/Source/ControllerFunctions", + "Drivers/CMSIS/DSP/Source/DistanceFunctions", + "Drivers/CMSIS/DSP/Source/FastMathFunctions", + "Drivers/CMSIS/DSP/Source/FilteringFunctions", + "Drivers/CMSIS/DSP/Source/InterpolationFunctions", + "Drivers/CMSIS/DSP/Source/MatrixFunctions", + "Drivers/CMSIS/DSP/Source/QuaternionMathFunctions", + "Drivers/CMSIS/DSP/Source/StatisticsFunctions", + "Drivers/CMSIS/DSP/Source/SupportFunctions", + "Drivers/CMSIS/DSP/Source/SVMFunctions", + "Drivers/CMSIS/DSP/Source/TransformFunctions" + ], + "Sources": [ + "Drivers/CMSIS/DSP/Source/BasicMathFunctions/BasicMathFunctions.c", + "Drivers/CMSIS/DSP/Source/BasicMathFunctions/BasicMathFunctionsF16.c", + "Drivers/CMSIS/DSP/Source/BayesFunctions/BayesFunctions.c", + "Drivers/CMSIS/DSP/Source/BayesFunctions/BayesFunctionsF16.c", + "Drivers/CMSIS/DSP/Source/CommonTables/CommonTables.c", + "Drivers/CMSIS/DSP/Source/CommonTables/CommonTablesF16.c", + "Drivers/CMSIS/DSP/Source/ComplexMathFunctions/ComplexMathFunctions.c", + "Drivers/CMSIS/DSP/Source/ComplexMathFunctions/ComplexMathFunctionsF16.c", + "Drivers/CMSIS/DSP/Source/ControllerFunctions/ControllerFunctions.c", + "Drivers/CMSIS/DSP/Source/DistanceFunctions/DistanceFunctionsF16.c", + "Drivers/CMSIS/DSP/Source/FastMathFunctions/FastMathFunctions.c", + "Drivers/CMSIS/DSP/Source/FastMathFunctions/FastMathFunctionsF16.c", + "Drivers/CMSIS/DSP/Source/FilteringFunctions/FilteringFunctions.c", + "Drivers/CMSIS/DSP/Source/FilteringFunctions/FilteringFunctionsF16.c", + "Drivers/CMSIS/DSP/Source/InterpolationFunctions/InterpolationFunctions.c", + "Drivers/CMSIS/DSP/Source/InterpolationFunctions/InterpolationFunctionsF16.c", + "Drivers/CMSIS/DSP/Source/MatrixFunctions/MatrixFunctions.c", + "Drivers/CMSIS/DSP/Source/MatrixFunctions/MatrixFunctionsF16.c", + "Drivers/CMSIS/DSP/Source/QuaternionMathFunctions/QuaternionMathFunctions.c", + "Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctions.c", + "Drivers/CMSIS/DSP/Source/StatisticsFunctions/StatisticsFunctionsF16.c", + "Drivers/CMSIS/DSP/Source/SupportFunctions/SupportFunctions.c", + "Drivers/CMSIS/DSP/Source/SupportFunctions/SupportFunctionsF16.c", + "Drivers/CMSIS/DSP/Source/SVMFunctions/SVMFunctions.c", + "Drivers/CMSIS/DSP/Source/SVMFunctions/SVMFunctionsF16.c", + "Drivers/CMSIS/DSP/Source/TransformFunctions/TransformFunctions.c", + "Drivers/CMSIS/DSP/Source/TransformFunctions/TransformFunctionsF16.c" + ], + "Type": "checkbox", + "Default": false, + "NewRow": false + }, "STM32xx": { "Prompt": "Choose MCU", "Def": [ @@ -58,7 +116,7 @@ "Prompt": "Enable ADCs", "Def": "ADC_ENABLE", "Type": "checkbox", - "Default": false, + "Default": true, "NewRow": true } } @@ -72,7 +130,7 @@ "Drivers/STM32F4xx_HAL_Driver/Src/stm32f4xx_hal.c" ], "Type": "checkbox", - "Default": false, + "Default": true, "NewRow": true }, "HAL_ADC": { @@ -460,7 +518,7 @@ "Prompt": "TIM5 Enable", "Def": "USE_TIM5", "Type": "checkbox", - "Default": false, + "Default": true, "NewRow": true }, "TIM3_Handler": { diff --git a/MATLAB/MCU_Wrapper/run_mex.bat b/MATLAB/MCU_Wrapper/run_mex.bat index c30c29f..add764a 100644 --- a/MATLAB/MCU_Wrapper/run_mex.bat +++ b/MATLAB/MCU_Wrapper/run_mex.bat @@ -54,6 +54,33 @@ set code_PERIPH=.\MCU_STM32_Matlab\stm32_matlab_conf.c^ .\MCU_STM32_Matlab\Drivers\STM32_SIMULINK\stm32_matlab_gpio.c^ .\MCU_STM32_Matlab\Drivers\STM32_SIMULINK\stm32_matlab_dma.c^ .\MCU_STM32_Matlab\Drivers\STM32_SIMULINK\stm32_periph_registers.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\BasicMathFunctions\BasicMathFunctions.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\BasicMathFunctions\BasicMathFunctionsF16.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\BayesFunctions\BayesFunctions.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\BayesFunctions\BayesFunctionsF16.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\CommonTables\CommonTables.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\CommonTables\CommonTablesF16.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\ComplexMathFunctions\ComplexMathFunctions.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\ComplexMathFunctions\ComplexMathFunctionsF16.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\ControllerFunctions\ControllerFunctions.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\DistanceFunctions\DistanceFunctionsF16.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\FastMathFunctions\FastMathFunctions.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\FastMathFunctions\FastMathFunctionsF16.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\FilteringFunctions\FilteringFunctions.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\FilteringFunctions\FilteringFunctionsF16.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\InterpolationFunctions\InterpolationFunctions.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\InterpolationFunctions\InterpolationFunctionsF16.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\MatrixFunctions\MatrixFunctions.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\MatrixFunctions\MatrixFunctionsF16.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\QuaternionMathFunctions\QuaternionMathFunctions.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\StatisticsFunctions\StatisticsFunctions.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\StatisticsFunctions\StatisticsFunctionsF16.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\SupportFunctions\SupportFunctions.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\SupportFunctions\SupportFunctionsF16.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\SVMFunctions\SVMFunctions.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\SVMFunctions\SVMFunctionsF16.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\TransformFunctions\TransformFunctions.c^ + .\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\TransformFunctions\TransformFunctionsF16.c^ .\MCU_STM32_Matlab\Drivers\STM32F4xx_HAL_Driver\Src\stm32f4xx_hal.c^ .\MCU_STM32_Matlab\Drivers\STM32F4xx_HAL_Driver\Src\stm32f4xx_hal_adc.c^ .\MCU_STM32_Matlab\Drivers\STM32F4xx_HAL_Driver\Src\stm32f4xx_hal_adc_ex.c^ @@ -75,7 +102,24 @@ set includes_PERIPH=-I".\MCU_STM32_Matlab\."^ -I".\MCU_STM32_Matlab\Drivers\CMSIS"^ -I".\MCU_STM32_Matlab\Drivers\CMSIS\Device\STM32F4xx"^ -I".\MCU_STM32_Matlab\Drivers\STM32F4xx_HAL_Driver\Inc"^ - -I".\MCU_STM32_Matlab\Drivers\STM32F4xx_HAL_Driver\Inc\Legacy" + -I".\MCU_STM32_Matlab\Drivers\STM32F4xx_HAL_Driver\Inc\Legacy"^ + -I".\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Include"^ + -I".\MCU_STM32_Matlab\Drivers\CMSIS\DSP\PrivateInclude"^ + -I".\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\BasicMathFunctions"^ + -I".\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\BayesFunctions"^ + -I".\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\CommonTables"^ + -I".\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\ComplexMathFunctions"^ + -I".\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\ControllerFunctions"^ + -I".\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\DistanceFunctions"^ + -I".\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\FastMathFunctions"^ + -I".\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\FilteringFunctions"^ + -I".\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\InterpolationFunctions"^ + -I".\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\MatrixFunctions"^ + -I".\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\QuaternionMathFunctions"^ + -I".\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\StatisticsFunctions"^ + -I".\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\SupportFunctions"^ + -I".\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\SVMFunctions"^ + -I".\MCU_STM32_Matlab\Drivers\CMSIS\DSP\Source\TransformFunctions" :: PERIPH BAT END ::------------------------------------------------------------------------- diff --git a/MATLAB/upp_r2023.slx b/MATLAB/upp_r2023.slx index 78fa324fcb37e7ae01c370bce72be591775f1966..9b886b6f9ea7d7ab3731af359b9e5477395669a5 100644 GIT binary patch delta 41734 zcmV)8K*qn3-2=em0~b(B0|YGq000O8001EX_T6MbH9G(R!;u#F!2S0qGKuOLsR2NM4CccU-!pYYy+4`8MBXX07?~t_4fv z95{PF`+5G)@4wF`;)9YL4i*I#2n52Be=GeF1VZ`u_=y1qzB4$r6bbx;>F`$52?To5 z@c4=H^Ttd81bPmVf0usq**$%4!NXZ+GJxayFg>;S^Q$7L!jz3>rmfA)^fIO~mtjp1 zjwJ?;$4|n9kzt06IVfg^lAWDhLi6A4Nltn5hV2$6DXGQ41AeEkDA=ngqt(YOqp7N| zCPmU$?o?n^CW&SP3H9wuT6%A{(uHaVId+Og6`IO+pVQOle^RE3z@I*T0XNPj#K*_< z4^WbpW(0wS@bTk8WJqag=_dyNz3|@)|Gn_v3;#zKz7pz!QmJlz#kI8On3~9-{x3hh zGpN)6foPPOg@lEf8>FG|ud;-H{uD?g{EXnhQhbt7wtFp28pyPWXurqNX>h=tnXyAQ zjo^jL7c@3vf4VI;VvUO3AVB_Mw7wA6W>ltcm{n|23BSuL*{^)ZptU#VZD> zIdlZWXz!4IhKntYgt|njXc|(?APFc%pZ@(uvLR230T-EJwI+VFtYP&GK5kfCd_1Q7 z6Yn;TX&YChP+ZD(V8KfawA(Q#HEtN;ix>HXcOQdme~kOo04Z%pRE~OYp0cp8{O*l1 zH{{p;preD8H5%n07~IuG5G?t|#6%DUm?8R}tFV?uOCc5{1JaRb;FO4 zW(MLXf6q1c@&!XTW;r6LgiRn9&N4fvjUU@LHa{O88EMsHq(Vfj*f~?poWt_P#LTSQ zs4}~@783@ejMXLNQ!6&q0WH#nCjU&Xr^&Q#7Ka z#AbbSGcPZX`0DB@5^0fvOjec_1;*NI!ehMsH?e3bEv-DeqT;7A;q6y)o5eJUm6aI~ z0^|MJFi-LLXFB_&u)-^gogt3POH1YVtzN+mGre>6H` z0_pgJ*Zj@S?9R?CCfiG#-~c7Y214eT=Tx}oUW+#T#+yUhU@BJlyf;a)8aqwws^X7} z#km2A_TRN@W`fomzq=%u33Rxozw%ipU3iH-!&n~m^F`0kuf9gpQBWYPs$Sqq8M3gq zmxEHsn3*xn+qqdyHc=)Hn4}`fjb zU+IY;6!g6n{H)sB{&3UYZB=V@ckOk2+}C`3y(!x7$6&_Y>W zQ4ywDHMyy&sYZpt8$V_FiLe5hBk_l=B^(MQ4Ay19wS2U)z(r6a6QQw$`hMpq}Mh}sgyg43;0hPRTWX=+N`BjzSD^> z4L>fTVA^nigW$xWhbBJ*1H-9MQaDp_Zmvyi^q0Ff6eXZ40aq56u+~)Jqi==0kk=M zV=&qHY-cjgl)Qea4ULpjugmWaq0PebE3nsS-=+Qfdf5G90dXwgcj^XI2Cd&M+<$aw zcJ?cfH07)1_dD%|ch{45{EeUVAC(+&>~ncKsDJ=aGwbW?*~P5kAKuFN`r@x1U;2-- z%kw}&<2mU(kz-<)-ZVoQ%@`c&Q~rp9rFP1YkPyf}d_cwIo z)78~=-zh3Qf13EDoF8+z*h*SjD%P;@>8PJ!^k?fqkLDw3sx|j z)br);80tcYlXZMhdNdc+|C&pN9%VzYB;EABt3_(4+tMAVYOhNQPjqUR`}J!N_kZxh zu|;1Lx1|NIY&h-1ipzY2y~qddB8nT-36g<>fBNG#dX60g(SlZX`l8`KFx=!5 z!fMq05pw~?eJvVQCgk%60YOcrrKV#yXIrG^0WrG`l$MRC?nVTPA)M?PQM~3YQf*?K z_OIS4px*~hI?24&MO3;S#-Nnj+uHuFZ@=Ta-O%?n>_~dC6ZI%B?(^XSi@#`>TL2xh zR}j`He=0BogR)!iB(u&6Ty(?O6c+Re68ab|wCI2FDTd`r(+z@u43oS~K$D5+wt&7t z5dVUK^WgrXUgz0mc9^(S)6pKMJ0M09esaZVJ;WJ)X)N^z(2@(4J#o5;aq}zI!sy+* z7xG=VSE!sHCgAW$^UNzDE9=5oYfb4M3Ff8&f1DSW&bo(u!|WdK5oZTtO10|YJ57(V z$>To8|5j2&36TXJF3*#5^NfU}rS>MsS7k>?M&+aU1>4$Kpq8@s;!4Yb#-OHPP%_vf zGyb$0W;oK=sAhQT+4_O##aSj&`OO>dOyBF`2FHyjdVZ~KOS_SgOe*8Q89M>-Odt5U zf6Od?g}>;zTUxW_#osuZQvD0j_^B|40qD*WeF3QG20SSrw6w6=mtxDxIMz3R8}Ms; zPBt*a109J00!D=@1~3l1H7hKzYz_1j4AokjI_o+8$~4B_rf`WUFQHI64mIt5_=Goa z63}MQ`L5;U-CbNc=gXIJ^75}ovcyr#e-TkY$9PT>MbBCOY%r;mraf@61JGiD0N`;V z54Wf8Dc6oP;on`JXCz6p>)EYl9W?zcVmw1Otbf7XsQ z%B?L=92%=u9C3VNy#D4$dBrEI2fK@a(}rUrBX_>AWRU%5M7DEv`QRSg#TIh$+ZDXy z$5L8xXz-eyRWmV7JSQho|M)SqFzfymfD+-UsT30v6F*1xXKP8(are55J~7Lx)|oVV z;!Zc$F&&;>EjzwO0aa>1ZTE$Nf5{8a;WjV)Ee@r@4FmKVY`e410rB63;>CupccRl$ z|Lu@B$|hVu+ncT_YD2_tk0<|(kxvMIihb~&(mg+)7*xh@eDOU?AEd~e*Wmsxs}D!U z(2!%ujId0{G5}`P`b4xb)9o)XhG!7he5-{j8<2yoqP|q_LyKEhJKLb3f5S59tE@3B z< z)IcA3=65|#tD>Xx<2-#hNrP;&6FrM!W#I;whnEbZQDgCPbA)<^i5*4}+-V@8n)oCS zK3}YssWZ#_`a}=GM4$A8dzq1SSEKIgs~!CdiWO}58~`}S(IyL}6uhi(8(1tF(fe}yXzq898PNr8(@ znhV+2=F{xCpO%rae^5JN?)RPvQ%oOCHt#7g&)tUIvYRIBZO4lV{!;ZUPse|rCmJk7 zdy_DLjpO1CLpq;dkZ;^~p!&F73i&kQe=0oBq)^RK3Nv05jZ;DZ*!9%M^*g0aHZ@!t zSX9iPrKLD<3+UO>f6h)1C>J1d-piBZb{=F^8seQ@*7dNxFZ$$Uulvn@{fe^?-5`(a z!F&u1ru02YZTa>J94zVkh^#+NLaix4Ml2D!)30j~-)4y?c4n8Ure?MyU{U$b^_h4KN)8Uf%~J`3J+X76nLqGO7xluLoE*=#q+laaxOgsrQ>nqo zmp#1F??1rEK!X6r-YnOme|MB*N-UcP-SI#+EjEYJMD==3j#1;He^w1y@O5!%)hM}0 z{_&PEe-Kan_E~;@K1NtKMu;Q}CubOtniYAN8D0X$gZAwx7wYXXubp01db+F*PDOFH zg)iN13S4dqv_~lW?0j-RXar1=JCNw+=y$>-y*W~}QseZfdTS2rMzb|DJ~6S*i5aYqP(tdXb=nOrBbRe_Lt)S~CifWBKLBu-Y$Js0JH2sMCLE zrwAiViYtvhyw7;drL?(dHqu>Wk-CEC2@37}!4vmZTfk-cSRo^Gqvdf#$&uSh*{yX< znFm0}dWIVY9~<-kz!F?l#j`h8j|cD%moSgv(5v2($=Kzx=7aA0yR%j`m7CM7hp+a& zf2V1W)p|6je@{TkZ_`j@g@jRb`dpM!9Hk=kij}=Y7QTKRFluQx=NgMZQ_<866+UQa z3V7$@!UxE2+O;kstli;qu~qn4M6h8ca=O`#6AKH=6DRWYs%5(Ilz}YTjEFQjyTRR} zI7>c(JU$$(io?J^=|rHV|w9e|@5>p1n))d2(#wdxnLtI?+=lbAI}Gt`aqL z(92;-J}KRocFQ~k*!wH}{rw$5cw!iuNuT&`*GCKsTEh&=)1yMFx9Ei_H7Zx%+#biK z9i@zxDD0b^-yQnVErcyZf=DcfB7HC zk*y*g*2q06=3M*~UUQ}LB^O;vcy@!pk~gvb{fWAYT2h$!-L^9Dcz?FR%gZ(0>AHR6 z9wU4J$Ytc@gp)X^^=MlMzW*T2oX#lalus~#QaG$gqMB&R`rI`;2vVUgR_Ck`!HTiv zh>$B~u9u>ECddesP?}iuPlZa0glD>+8!*9f=5Hi~huh57 z6d?{181cM5%nzkF9q(29#V#~2$>}b-N7{{)ge@lC(ma^ylT$m&ssP#e>FGLcsh98KYGJV55cBhia-RQL8 zznIlvA&H_Ri10zxz&{pR3Kg&cP-Q56R6|+OlXRf6N*ly(9}OE2Yc$ zP4v#yR#xBXsQA+Ev;t=<+H-tIbUd2h8-p3{*W2ynlWO&%#6(0ZaJj6mMAkxC@7Q&= zu%FK3pnX?iXI*7Q(f31b;Zox@VmDZfOLCPO5T!z1;$?}|s1O(oP?`Qef8N!;LN);q zlBxB#!F=X7;p3Ruf7wYteEKwbcoiDz#L#vz22NSf@H|+{G#UI8G?;{bXEH8`ezk5x zpyVjzgnsMz%aY<1tGU7U-zML3@`yiwdh}58F#lQKjEz51mLLxg&$HOR9F})5CR={} z{e!oQ-8TG%F@0-a`m54~%%K!ObECqQ1860x_lc&G&ASOVf44V}ne*&@mRKnwyFudl znDzbX+oYbwx_wl45!?R3!LCZQQCZjg|uhre`~I)H(%=KtH?&hZZ4)9op?8g z6~3Nl^4n7Q7t?+hRh)wNcAKWOVfFPL-}@k<^PE4c?w@AvaHf~<=u#fOUM}4t4X~CQ zW4;;fBeh0KL(w|3g9VoM{PG(#Y%Xi@qI$7FfJX#KKL9`3mgmG>vVX7|&)3i|R=k=v zp%Y^%fB##>7E!lBr4&C!yehZ(5xTM?t8w5Rz}9b zM-@sp7d+o0xLT7}Bf13OT;ez%A1@M_fL`2Kx;y2{{cJmnD z2GHu~q8FMkR{G;bfAHHL{17?xI&2NupA+vu#S;-+^>UyQ!iR$hPJeM(NQ6x>aR7K9>5MgBpR7{KT0dVA*2sxl%pQf6i(@0keAMrpiB>*2r1WwkF$ ze>N2SHxxXn0bpB*HhH+eyX@uk4Jgd=r6_N^eszCL`+$#4!q$=8_U{FoUd!Vuv1w_| zs(k$Ty8n-@)RwcEQD@)_pv4-IjCYF8f80?#x%%QqdO1rlz*-e;;vGcCJKs+5ZtpsmfN6TRI$uLrEj2u8!um z;NoY+e#Pl~Ts#KFIc#et#3o(jGb`>vke`V6<=ID|+&m?o6i`_b4-`;bqudjFR?n?s`70GlZ zgZ$PD7!h-(Z(31RSI;f8qe)jXpFI`7?8G~Yw)aNui}48*^{N9d5Z{zqe!Tjlo=JPu z(l?Hq2cI@3?P$}GVy36N9c;hFj-e5a1iGzclIg_Ml+oSwnMSiKyYI=je=HElY&bPw z&xTab=WOZ2$G;B;HUdEf1%rgTXcXXjVYjyk#VH9|djKLk0e9&4wzT@-;synjCSb3W zA?}RC;Ltxe>JA;P3{~oCbahG^2o72ew|GlI!LdB0oou)}b&qhmxCvr;l^XgK+wS)4 zcf{Yc7R7_d?oZcvvDnAle}o}T81tVMw!UmF|-rUR`-*E=*P|2knyQ-Wa!CiRJ$Y>St`%q#E%3n*ww0 zUXPlX$S3$^6$1-fVfoZ!<Z&IL`@t(Tcsny0xSmu6|U6yXJD1-2r%c{oi=r9>g&5!8~`Z>{G(G5 z^+GNjBYot(PMbFhkVfgBAF=>qw37e_L8@mU0Mm{a#^$xOV2NdsR_#6h3NE(;NkADM z_e%gvvUDiNMk2#PB&nx|a&lUXA0@ML>H(jh$0+0_1W>@Ye;uzchPvY`d9`6}6VtWn z-{M?L1x!b6D+BVbgLJ z*3wRk%g6|Nto_bFZRYD2KOYD9fsc=O+1c#;b>^*q3ZJzYNJ3Q=m@1ukdj-P9!{aH) ze{++Sg@r}Qe|DnEdHc^X8$bN#-=&+@%3}i1W8wAo=7Zv9|5;Q$ddv|6u78-PnLxD5 zj4i|MAOp`1EqTITgZc?|K|m%^i2iThe*x(kZfBIhxRR*u8@0Onl2G@1TEfy{SSJ_1 zi#2dRTyc+8|B7Czn0|Hdf1L%$5aNaRHdvPGHbcnCf60v#1brNa$IW35QwI9n#p!h=D!u4<(be_g1+hRf6NtJX^r3jbv&9Hy*^zIQnOLL`K@c! z;wd9U*)_Mv0fXFm_x^~CygWHB?c!5iyyg2F^-c@us{O;ceSBhK2&tYs-M;bM+}xLc z7N=vGm)z{0<*zPm1%NyEbgvuI4-QvA+KpM9!y;fAE^)`d*J`8BM}eK?M3DVmC9R zF?C33$xq(}U$GWGb}$ys@~#wK<_~&$PsPtgW#DgdAI3#0u&R z9~vr&;Gn?)F!h?x^c{{-cZ+}jh8GmDy5C=M8ZI=+{a(Ks{PXIQe|U1T zvW%uCpX>fLSt`GKyFMeq8<(>d?w9ECzW`Z})vVrBcILMD$~9J`pOVtCGr4U&jTn(y zs(2O z+CPnR?}AYORjO9dhq{j2p}JIoAaFk8X9ZbcY=e z@s2{Se~|NTt_=&gW!EL4T7|!T9}@EP2hR)IjErai2Pj;g$|j)%p>UBoUwYeOgoz&A zXv1e_rni*k`b?c&Jg(<~Hcv4&@QoP-^hlrBoGtzcJue*4<3@n7oX?Jsf2!(7@n)SF zjS7FoHTu&VpRu#}QG>S$jO0q@n`Ga^k=J9;r)Jgo{AuK7)hHVqF90%9R2&ZP<;taK z&Sl}Ehz1BlT}zq1F9W;UM$)xT*cM0h*c;+ZQQ)@df&TbrQ>W$lH)YWpu#(m0vEFFH z3qPE7<&4)}?9E-!kZ|g8f9>4tFL*t>SbCU1V#^x;Ub76!{h76xyDNl+^{&h*B2-R= z$h|f5%kI)*^%leTU0FRnn)giD@7_f|i!HXWX$dso0*7_KUib3-2XFqt zXN?;f<+S41xQyfad;DB6+W#`4RLSp3k!Supzf89p_FZHe{phvec9S?7C65k zbO-Tc`?6WE%tkYD^5EegSkTH5QAM8$aj*3D_ZPpgy^MYj=+(A>s+C>lZ4_UKBnptM z8f-!3EUyWOi4%X7WkyQ;!in=&(x)t8LQfxItEsJhqvD^N``6GX=NAknW4iI#LJQ4` z@kxCtsP|JTMwpa?e;^N_R^h-ya%yXzt~fL--2ftQ(a$K~wKr4${Miu$)j0!2bZ$-# z3V#|-Q(0b3jk898(zhr!RA5m;X`+6&KHN7qti?2cVTk_1m-3z~<^7uDAyEDjNdX2W zPb)xi0c9MtPXYw@&UH-8aK+WOhKqM;h|J~GlhK>({0B^ef8Z8DWvI2a5w<1v&9<-o zrN4OzL7aEg6%?M8x9eq{rw{Z0+himND|pmPP89!^bUu$%fSn}-1oGS(5IVAlk* zkr-9^9>QYTqj zgo=iHt@7R9eBO$=K3^v$d7iHQ5oNEkygc&5mQz{)pB^Y3FR+n2Cz2|w zABdhdm343J2C^}CiHNs9!@((hJ_trOfr}@>s;Y+7Gda6LT3V^uuvd;wr=JvHuObsD zT8b?Fe;HAzseMh%x3am$pB7JYmu#B`eG#f&cE2+LWuhFk^Kb*njrC1B-lQ0J3=?jDl zbN&B!AcEC6&nLV#Lw&L=-&gU@oSEafWr{H3|XkEzTy88`6rv)Fo_L`<(b3s ze`gu*2iod!&kZ|Za_9U74NijHgmg4Q-#&M3Mor_l?GnA14>~`0_1@K9>f-dfQm~n= z!31o)`+EEhfKzGiR~t(PH|M1co-wqy%W2_#Ye0=3x;ez;vH)L)OVa}@0}N-y!Ts-E z`vTA^01ESaxYy$wa|Uu%ga!|)DCOm~f9vvUrRVLvYL5{XBob3|lWHhSU%=ww3Z93) zeCC*RPu1OBZ

~xT&v3(XV6TSEc#SH1p!Gf33_kB=>2{l97MkmEd{_9UO0{G3 zAFgGByCWzpP7awfO#IphOnOuKo(j5GSVCoJK6M&^sHv$f3%?b?A`=o;hRG4o=Oo{< zcg+~vi)HBXYwE+YVHD1ym3>kPf4xRBfFa}1Zv!cgf6&sxh?0?IE1)A8ryCSXO*KMZ zwJNLldz}7m%Muk9W}Xc56@8)Zj*aF$R>J}TqZ)AoAzn(tDMy4wMsBaEkpP)%Aog%I ziVE_*nOk}SQY+WX5W2X(q3zsw)qQn(b~!V8&$Z`eof`Vc+GTbmSlCW+f8V~LDvoDc zYVCAS*tBU0Czz50!WWs_VO1-MaOtmt$fz{Y3N*c%cW94eF!! zm@1tcn-**g^%SNe?;jYDpK}5Jd&IKsOpsR+wpbgtXWw>nfKE5a)3PW_MD*`Ichm7K zCZ#*9_s0SdWdnX~11QDvf2j*S%%y9)KUjr_j+ONe%YJRas8gbl+i7na1?`EhvFGT^ zPp<$UFII3tl#<);emZJzv=SmTQVI&Z z2dx)5yC0drLeb=^`&~^g#lQb$+PAmbY6P@-oU;Ccy`nfdYj!*AT#NOyWt2(#zO$pI zle`6YX+=C2NFEGve-MO7jS|a09xpO6VFWMD*wjrJdo70btX`X*V9KtQoeA{Vt zb@e{0P0=$KJ^?(>iUS!bXC{EaULVxmSYa^PHWn2sYwVQ^=r)J5Dtm=yS1^}Tcz6tEo~9Uo&0 zrSLk-h^-sQV2Wjs!_EH1g1*0RTQOr(RF({suWqyJa^0Mo!D5TH!}Y(H-X{a5WYJ9h zRbRWh=!brAe^cI^?}sAhBA#NCL^nWmbu%zRBtJ~@qXmfxW^e$Z0r(2YrPX8i^4A-A z@fjK4u4gu0NoZ>`I66|rF)|ocy3cN&-Gk$wsNbx1uK*5^`1R|?yO>|fb|b5|l+2y4k4ylU9m+ z0}2UffIW>O1Z4$<_*EZZTQ~6wY9bJv7X)xG0;W> zT46YxDvTcy{6S4E;I-2xpvub&ZcANv7v*x#m;1tKeSx~R>-@LPOD8YENYD!){g;)l z*KArfe?-D=_kd?pdY{H$6#z7d*G1XGg9>cYgwe8bi5s$PB>SK8sIdpehDSJnSYgU+ z`&H6^8;C!@Ec(>PZ95AEmM?g9zrVu#IobDX-S|@^T&jQI<-SuoKrw$Cowfkp((~A; zcfZ*eaM;QSD%ZN0Hnu+AtA3#7PXo)SfFaJYe|EX?PV8f8>GA*2M~Q17Z*?&*prb!p0Bsh zh+CQ-e|>$uW9u}(pwtJD=L)S-z=e0A;!UiCklt@MwS_ACdA{(y+a(23606x~x%b0J zf0j5NP`PK-H}P5e{ua%W7)MTxfyebOQ1E zhhqQY!~2M(>3vuU(D4prMQ9{FJh)3-e<@pXlzN(Gxmxn_vOmeEBEWIDkQbU`rPmvZ~SR)e;!Qw zl?b#QcahB0q$H6fh+9?vU+`OC>$W)1D)Wy=$*D#vdG3KFfR#sL3+r?Cyk7noQ zV)ez+c7A_pI_5$RC`*7pUZmrhf6u0KlR>4%r^C|6Bbgh-1=Q)JxM9l|H^4Uys%Plr z3A$FefL6&U9|-U&MKsX$tJn&s+>}eQo&`)=Nr})L9!SPz!?3>o&RmdvB_u2gw22_t zD-5(-t~B-!-^ohUysS)ruakb0_Gobss{YtV<9(!JVlth*`voeHB?rhDe-$x#93wdp z|IW^!n-2iH=jIXsqCq9nfY;b)>Z8Ysn(=%1c&+dFaxK>Qc(vF4V!`eBWRO$2Uj%;t zKJ>*0`_J3gV*zb`g$azECOrAqxZm>Tz2+@NqT~sGH8$d3T|Kq1wSA+iI-Je7e|1W_ zT8q%@{gYT+>2)!p=d+y?e;1gZE*%vFg9$oqq7l7_Io%q~Eij`P$rK$w^zmuqa2MI{ zA9BvAJSNa7v$05>x_b)c#_l$%?AkglsMEy>2?W#+BP7s4NVh`E76VKebj`!7d*nX* zTprw@qshX-A)~I&3{?zSIFMi=l4c_b88DH3z8to6C}mav*fS&ae+G$;uu|(Fuh-}(A!&jSRuRX+uM9Ep6>( zXES;ttj-m(7VCbie_DD1OgHg@hsGViHau>J9oKZPICR<7ckLhe-JJyn(i0IeHZ8q- zcRgk0b>b=N&{%bISCzGw+yBS0+p0D{1~E1A#G$smtq{nfAHyWldH53uKmUY;HlHPl z(HEvT{UbDsffDk7i6w0+Tf_6aMCG@c_Gj0wGnj;_x^HE;e~}>r$*i)ONhptVD=MA^ zNPtoc&a~V^^XAdbc~Vx+JfB7UvvsAl$ZrU!sjsIa!pg06agU`8FDL@c^zg7EkeSo9 z*1}0_nqbYO(3b3XxtO{XH7@}7v%dMCGs3pE5pODNZHME2!s|hGwY89Kp)7lMYr7sJ zI7)lRd3s$Ee{|kujT{LraNVB`$<3u--`ME1sx=zT6s?%sZ@5_WI_^ZLgJ-srl7rx$;f6n8o;b zu@mwIs2`ShB9pOYyIL1!oFvF5+^F722SK#7;h49KUk$h(w}*o|xqk8}r+i!#buTYp zui;Z1e;=Ei1Xs^Y91>Ke7Q9I)=^q){cLBOekdWuUj_f5_0hjTu|Jrm(n`I6wC%7N47kbn0I z8P`})=kq$7nZTW$qKcUoKs1lCFS6c#v;2N@e}AXVWjhN=f9*t|R!n?#6|wzhKFJ@pgoEEDLw9?o=m#2p=x2*2IwQN=*ep7;X&__L}a zfZygHRJe(K{YbsrIDi5mgM_{@hghV7Wg-eiNh5;e;|+syd*G{<5=wPm9WPA2osO0F zN)*1i^44wGuTw?U`$olg+c+v3FzR=Ie;O{E*RYl8byS^o2T@33>jXRrx7WGN?KPtP z;BdT0>BWM8gJwX__e@UTyirokKe4n-(c-cAA>VJ zHd79&EQ=nP3iHo&Dwrvv>4k+UfgVuBrq65d;~a3{U;?>bd~Ysj-PpWfH}5nme>&ad zLfLk+&~~`ALN>^BFd&{WN2vUCx_-! zz#-6)M0IUD)6v9s8gS+2n=ZSse;ZY@)Y&gZ7JVWl#woAoJ-dH6>K^{VXU+fW)w^Y9 zp%qN$=Y4v?=jZ(CA|3<^$sFCxSyVFG+LUS~NWX_C0H?qFHt-!5QeRV(-PlMDRFhx~ z)$1q8gNb#aN&@t)%KJ_2wGB96)D}JaS3_Rn2b()m2p60chX?Ffh`M9cf9wYf_X|i^ z_Xi@=_k&fYgG}=tTdywf^aQDt$fChRseF{QzLzLGyw%*M@5uRWJ)fR#vP)=czGfp~ z6%v~L@()QDEcu2_w~1=5{(&5ultZa@2hwMtctFTb^Vr4aRvb*0_huN1id;bz2Au?t z=P4rzBpKI(=HfLZ`3WLif9gprj;;VBc(0y{3dns-ru!=9?fpf2AEND?&wJ-nR@dW! zum}!VhT|OTDb4t5sg3`P!v*0FWm3;Kh$7Iu5g(we}r$3n=hBvFG?y7 zLrZiTDVUXg1Es>wft#jnT)!u)(=#*U0iH)fM&^Z;)o*hzc;$}N;octLO-3qi#uS+d zVX%VyeW@%V7a1jjhYx?qFfp-HRyB8)`x*4 z@EjHiB_$4}455h4O|#GH%H6p#8(b;xF~C6$jvEAkV~lBPf6`Xb)D$(_Xq?hG%c-xe z?XYtLI2X4VPgjLVr_mAL)$95T+5rAOiGtDW;!HIv=irZ z4w`n$=s4GNVXpw~i3Y;_73ndWF3-lpJgu}3OqTn3YHWzF){fHD;Nne z?a8r};3AV{SwGCgvy;_`$}K9YMNAa8v>2^#oRtmp3)JJf+w0P{*n10(PeH&8tH&%< zxXcNP2QXNN0T=GktX4X6`XH-JJcg)KbC=# zK{@~x%GhzR~fnJ1m6qQOQ&29XW!rX!1%)hOjhHi6|6#_Qgu46BuB zf7QBIA z(o-3gCIQ|7X($Ci_hEpANn>wt-WI8}Yf{k4shuX(quZCoz**DKj#tD>O=e{Xz;`jBGe|Z|wN7UsO(1%dpM#h|zeJ#1f%q zT-Q3RNcOCK0VXbTwf+qNI6>c*hmNw6^jKc=7Gtf@?Qv4b?)79bEydJXZLO4be`aAo z>(yp4fS!*>O90!9+}YWq;kJ>qw#J4PTaV4p2hY#9a4!U+BK#dD7p>8qFNbY7qBKAvSaF*94eJYGBcr)$e)T9NVftquSl zDDoK@G<$Ow+_2)pmB0C?6*E*8f0m7bBO^Uu0}BcUA4jyU-2*hiy)EC8hkIj};sUH(3%SjNi z3gE>1oeOu77{8nPETw*}1E>d;uqy^7Kg4g6Ta$lzzLED5;di(7g@ssG8E%1Al}={! zYJ1`I?Ppc$@s}^E*pzpMe>WrSOn}UnGhXXx7LLFCD$9_>*^a~X%~B(F+=ZIfr-oXk zY|XpdskvH3Jz2w_l^EH6vv3&N$~q;XrKNRr)ZIO2|06s+T$X8aT~AGoe_a%0zOt_n_09Qd2O?DK4T@ulE|e}omji)Z-w@M#L@ z7%B>J6E2lFO~5cyUd_bU-@gTkWaPMhb#Cx1!{w^W7-k{zI@ZZ4;rA-ey1Y6N&7Q9E=M>MwLYON6A6$@E=1lkU)aC z{(JfV!R2QvSxn)7q)a~o>j09MQIal`Gzk13P)h>@EdT%j2mk;8Apqen?7vhM001O~ z001D9FENS5#U3PQm&>=|3;{nAb}aO^m3z)sYW<8&qn0-7qx zL?}|>#g}Q!qGx8`_F=Kuhkcxf`GH0Qy$E`DvcDkz(VnUzDN?GUM9S{zomILaORU3F z=bSpXsyanJ`t9||6E0C0xq<(YwQIp<1+Ub^{e-{L!L)(diu+=fe+18h6 z9E36c-s(i|DDhnX!*(AH?8J-P;W(a!?;Ra{h@2O<56sNShr5^IP#;|h9N$%W1C(os@Li36Ex}} z=P>b|Bk1ZB)RUk+6@Te9a*`O;z890V9}pVjsL|a=pah|BdyQ`H+96y71WWe~#Kxd^ z-4I>6=<2C&pL;VpSnMkRyQZVm`wkT@?e2Stf znoM+rLkg-`6jTv)Cg?tND-lo?To!?qMV${Tr?9%2;#Dn;349F3h9OMV)T;*^Fsgzi zC`>fnz(ZZz#W-@5S?$=eUE2fI?GrCdDh4-rZJ5(q(u;L@gP&p zQY2a45@e%oN{X(jGDMT>J|2I8K{cUC(sU8RrYvb~1Mb#z_@Rh0z^Rg?K;(dnZNo53 zMK^?9K~_X)$6czD3LiTB)I`9hX>d)mpnF|~2Hj$Fi*4%A3$BW4DXErR3_k_%)2JI< zp>UYEFotY^)cJ2Sb@d2(0D)bi)Z|Y?dyIY`xP1sqe&7pk)?Rt@fg680ArRXHNq2-C zV6hE6qThh%r$J+9K5CGt-wlH?3S$>VQ)F;ipy>o;0WIOk@1tv+3}g@`7Z;4}At+S8 z=MKREfPhP`YX>9CNSN>M(GC$PJC^}}(?dA0gHfm5!KE)AUhnNrucw)ewIGwxEAto4 zB>9Vi^A`(rOgH4a&tHFDB=IYNClfolW81dw*v`bZGqG*k*2GRGnCP3Q z{`wEzgYMeZb=5xTgWlI#d)3+{ELlF66}bkkXfUb)4DAcPy6fw@SkK*1H6h0E1i$Ju zIfTCkV>~Ux9!FSqe_P@7A$N2l)7?sDq}oiwkJhTB+qkFa7&|VfY5?1+A4m1i-Md_{ z+yGm9W-_+)ZKqe<&TQcoZLdNStkQOar1HaiKdIOY*tg^Y4LcYuMf*?(R23T<5={Wp zG+e^~n${WsO&WVNWo5958k$UJJ!J+Rj84+@3C0nDf^L`@CAGGJbYo6R4TFLUdC_tX zjX9}`jpEPNXo7UoHeh8>rrd15TNzL) z$?ChX&8&}%X2kcW9W&|nmbHN&zC%c)_}FKX^-a_ZfL%rppQg)z*c9Jn6DK{Oj$g0% zh&nFu(>(kRC0J_|{!F^BKobK$kOM*El$v7V^AkaC6l5mFV3D(Bxt|ob0Vk z;?O0DP}q9er!vq^#y#96dm-!hQFH_Ka4kV)R;tY8{rkcclD!n1DN_iJqTJDKr@&Jy zn3}l++qg_AfwugAzfJ6Q@*vguEXnlPvFloros<@=rnOdj*SkFMI(i{3fCsjTB=4#N;R_=yRybYwoBT_eU&!5j8uI!Z9by&;DYxrrk(OCZ#i>MR{->!SjN(G(~7sYrM&0TAXueMS@ju~LF^ox#*Ph}L+EU} zXpV#V1QhbgFrs)ja2yThD@P+J^yZE%4|?#u$!(;uy_9Pwu8PLU3)55+QpGtUI^NgQ zAitmm*Khi;Y}|FmSs<+c9aH--e|&xz?IVSa4fg+eZ#@Ot zgA^LBACYWAW-c;sgdG{Z;spj*i+g5-MjC8y`|l%#0$s>qF79Wb_BSE!7QE8^U>w}6 zeI4@ksHb@hPzraT$8Uw(s{t$mJTaOBK1dXPZ{uXA!B}m_v+i|l@w9^)xeS) zY1ZtTFVsyR)8lsV^i7Yl>7Hh`YC@oKVuk-jJW3E%&NXs4z8lW-c>n~AQy_H2{?NlICu{!9jW(qxEGj6 zn5xK6E>nJJPQu`;*{k`Hzaf-*W#BQ$;FFb5nhPKsx?rwxXkY?RgP`*KdO%=aFj26B z;LA$|a9`4Nu6N?*lM9T7TJb)w{VUtl^9gOk0QRNTe8OHkOvdT_AuAje=V@*|Om1n55^>(Rgl!l`E|sK16;TDdMBmTxS(jbGN^?KD zBqkhIO(mn=whFSHjeu4L#KfYu$n2bqmLTgK*?~Pe(p;~FB0Z3y00ihTy{RS^NGgjH z6S4^TTi-#jL6d9TFv%alR!JQl^P)P%cl6ha&v36THwIy#&Agh-hBHJkW1$kA|05Au z25u1KEC{wdeP*V1q~}%GlsX&uD(>?e5IP7BQHcNNh1qUTipayWW9ls}fHzbd?=x-h zRRGtx(jw7Cba4eowI4`n3uoao5@n5dqn!IvUIeUJw>He48ZbvFSqhi; zYb=C`wu32y04m(?P?@&o$Hsp+qaD%8RDy9w@H~O-qs(EHseMjb&rdXC>{Bp9utG8rPbOcZFcWy~Cc!0l}8BBa;(*XJe;+u;fSvK=D1QjpVw0y zWj1`VV+}F1A)pjH(;F%c?455G==2Cma=G5Il}iA&J0nIEa}E=;PqUw)oTD=uyo?e? ziHf+H0g#++vOYejWX%d8+;uiWTS91Xd91%O=U=DFjer6NXw=PTl?-~kQW}5Uizg&i zi!r4K$&K%?D*Evak{Rm1jl%z2xK9qo4dR~m{voZb%D#O|RQIVXub9eZC91pTStm-< z%#1OOHAzOyy}z@lE$yr{={pMb&4JvEPw1#U=QD^p#K!=rc&p3&UXbsRVVzW0-VlVQ zMMamGJ_3qcHtUCYgsmvX{H@3g+yaMopSgm9B8OQT5&=X6cJ@%8nI0O5?MD1iWqI|n z{c!i+YsPbQKa~T}UQ>Uk5%_qK7r2D@E{gj?=y(pQS% z#2x3JPIvLCl9%Nvc~Xvc;K*bGTx*qgN$@}FZ38SemQE+Z)*(4ApQ1gL-$8GLNsr} z^XBL=#AqeGMF{{Wo^G1(vK9vQbPCbOh2QtyC;pRkU0N69E1zv=xJM8-T9L4l3IB@r zg9*6e<@a}@*(^qo<#Sl3kfNtH(<=Mi+!uSDy`wOt{hqaRm^!tC=p9xOh=nlE%`E;G z10*DhG^#>WO0!PbEM7DcDg(kp{xz$)krO=-C|!=?n9FE*$UR z@4Wq*W&H3O`FrOIEmxfmtE-PQ`rtm4Q7zCKMg{OSY0~qwLAKEpj{Kxck<1~Jhp&+V zg3U=UZ{Xgw(HTYOs!32SC)a#sgiFhqv{p)axtI))mBU!F%8s_klDl?nEaZL9_Us9o zxbf==>e%h?p4v<*ouctFM_&wXgmqc>kSY?+(hz0P?n%sy)tl;^ny1%#QMF)9Ed+M+4>r6J#OX!Wu9wlOy*<_O0|k1oN>E8-m9PVFPPQ@(w1hz zqx+&|=P>#Sx41>oi2FaL$85(o>U#kY=lZx0>1uQl#2~TxyP9Hg%mHdQSH!iAikJXV zF5~7O04#MC-J)j2%!mqo9qzUUHsEaDVa5s$YtGfwqvn$Y*vr*D-7^`UCO8sl8?Mk5qOXich|pb_f)~h0_yQAo zdT?@xZkac2%RSb*Tufnf{7P7+act+5X6KwGJz$FrU_ROI(tr4IX+fbuZUVSA-Nv99DGG5{CzZq}$!+rM(DTOSW5k=_TFhQ$l(o&O%wx*o-4T=3kz z7CS50zhx2@gF`&Ik`7ogN6I*FDiyonS{0t_KscfwMVO*%PB7tJ5)nGZ{k_0w>L30D zp!oHRRv!**^+Id65`rPIXmmP`v62(VGH_cn@voD11OyvQ2R^o89sx-t5-)%tu>BQp zIfQ(j-)9D78o990Z=_D4+=zS8hLeUiWWG(;ao7kQIK72&b^}A9;7Yz7+)~e!hU833 zJCPcvp6ie7x6~=fM#=1*#|o$s<$g>3&~r<0M_lM3u18+$Hkot9iuSG8dq#AbBq77M zTV60u-KNl>-70ZPfk5X<2u@l%3%Q&K+c}A78cC1uaPgHXmE~%iTeVS(ye8$2i0Ja7 zrpk()W7?ia*aDLVS<5^56avb1lRS#EFvV(exmEFu9lwe3U}Um-xD^)S&ad=L1fpv* zz54ZuvjxPIwOqR78yr|`ZYO`Ahr;f1{W7UEyoDj3R4DU3Jzy%4t+!9vkp7$W`P+Re zL%7J}eQG++9vu|oz9Y6KnjJfFqxc_@B)V6OJE1MpsWM5o;Xn*~Qd$JqrsKBQ*Pr~XFfp8QvmZ-nrc6*RgT8nC++fg3USV}78gaDg}*f~Cnla|mD-oFEXMEMeg+*JxR%&$UhUdEczYJ~GP z%08VX!?yAVA02r5)&FXVR};Sul(XNZz8*bGy;X1ImjQ9Rzc?HaCR$6^@GL~5s#3+L7Q%hoAebxy*N8MttKG-D9Mb1^ow{5ehxsRm3! zNN05H&Q|P}-H4+{+=;2U7nEi?RCcGm6yRd$pwc^j@@)9Y$m*3_YM2lB`FI7Xa<16W zW+GbfzXEYjy1!~5i^bln&zCtJlvhSBzEm|eQiS!7sfk?7fj&cDUQ1HqHAKQai-0wS zw%4VU=qS)s_SpAai-7HlPM)1PnGW*GJ5|dTx}AK|(z2~3%3d+`ISj;xDA7wc$1}tQ zEr(CCY?K=~)sEQ z_oG?k(o%vPjW%J)e))H`v~#6jTg1$A8dwkr;=MzuSf!JN;VVc0Ks9#eMY8H|}BQWBI^8y%l7m~W6WK9b4YbU^Oz#3UhVooC^n(m=jL$xF{ z9pS+dO%Q&d2*i=EE_9y+VYCOni&(FLHYXl*=c-xUtgr1c<|9W3AQ3LdhwyzhdN84);W$g_Hu&MA(IbRQ)rR zl~-bfUd`fj2A)J8%>PThPJ_j3<-&o0ytt+~Mi8gWoj|1+sgwd)3cq$NQ|5FYzF$wG zx$zli)hv2q=cdk|$Psa|1^D>97TBXU&>6!hutvdKVGgAqLtO;p7)|bbv(e2M!Wkam z0Feu840W+T{^E&skWpSF{2HP~^nC|0F>Uea56*zPXDKlh`1|q=_Oe~Rv?~PfAfxu# zB78m)WAM}OOU^1KT2&Q`Xy79R^47?sX!9&uu^XP5aHW{%7^Ue!bp;GGGG!c~2Fj5_ zppg%}`x||tZ$;B-mkA~C`|oLv8wK#~HX!Z-zA)wlIvgx><3Oep1}1`-at$bjkt}kh-PADsq|$eJ6HE z+_K0RoWt)*{t2m(QE2NvQ$BnyS_+c$4-_&G-lP?SIrYh0r{!)F8cImHZza0i*W^_+ zALlNMI^2&w;M}>8Cj7#rglU#5iHHlgGlX?H$R6DrzY30$g^G{lc@b)jyf8FGzIT$W z@d}B8FY%Q~)Py%e3xi%1tU?sb4hl_yJ)U2Hp}=KLtmB(e&0~K)NE{mJy5E@hKnnv% zKLA=a0O|mRR6)xVcp+L97y;Ns`d4(%K*;iw*FYp%SFEZ$NoHnR$?wGNr9Q@oYPJw* zA=OtLU~W9b(^d>q5P-)eyuA@6Z^q-%Lzm~LPs_cqKAfkEs1*J)&F`B?@`}ILPKQ~w zYW+t{xI8-YhEn==Ycw9Kbj>cV=lp;kH-9PFZUS}NmEfC97+Yeb+?4p?%ARy`8s=yx zPwq~bkkyhLu^wfcOQP?x_tRyz^)WXle~q}qpx3!G1k$`PNCo?>4f?DF5ZEO+%5`qH zxjx2lVU2R`QnoIvp(N;_*Y1p0wyP^YUoSkjKc5WA2Ew4YV3g+9%bOR+OA|l{$nw)C&k91-i>3^L5 z;)c3}aOw6shv+cv>EN23x^@}Yuty$4*>knIhUm;**CMz?kI4R$7Bgsw^lEJ|9Dfxz zRk=yqdli*>mcic+cIl;YJw95Oxvp`VcuHLB5~vIzs;VadC1x7jVSt2(6)yb!R}!E`&WUF?}8$!ao5hAWU*6`^E;O}ifsO; zFP(c+gRM6dYMco-2A-b3-tl)YiSn#&x4m344bNJunB3MZrVGir%15xm*1ZN=eB+g! zhpXh$J-IuV+6dL8(tc#=50hc#Q`9;>Bo<)o2vWPv!EQa}%mZ0mowPo4oV0RzdHjZR z=(+56Mq3xIEQwf9gb$$#yN{lE+Cly>dCnUTz9hiz*%4c8-Tw2boHrgo4u>R$UNK5s zp}&n1&A}vz(XkpYf{0^|yy6DH^vRl5lp+CwaGI_6mm|adEf#qe_US zh~Y|yDOZv?jIk&}Zw;4kVIa~n7@3*@r3!iazPHeN1BWQgki#zycptYssZM*C{xa`#rjxtSrf|^usIQS zMK!0j0z9&hIFeSGB#khUqMhrR++dl+)>v0V9r)9S=+brDg*@16tWI6DAhG<6wUM!K zb_ZJNn(Lhekt`N>yl=0A~a;7#ubP73)j5Tsk)?smB0yHW?Se3}}=_LYpg^t^5ZdYdrb zt;odW0nQP9#Z^cC##R-ZCqrW1TRtDs8WBpUZ##W?BHd+dDx9WE@_^bkE|p)}zI)l;3HDfbSb1C{ za*Z-gSgr|9+MvV0dvgeTfL3fsBU602CYJQW2vkQC2<(H+g*vMn#X-@Y#Kp2#eMeU& z@~Vj?a{uk%cWw`xmC6Rv%3*v0mXsPe&yuJH>;mvd^Dtz=h1CU6Vi`eZ*}4EXw!$UQ z1)VcB$tz4$mz6+!e=i%EzjmO35$iGRhr|SZ_~bw17%d}O_5A=7Z~nrEtf=3q;=liZ zA1};JjbPX$CIG}+u95<%R@-&y!qhX_>S()07}c3V8m<9@cC}jFNB+^S7x1S{y7k0E z#ch=Oe5qy=?Yn+JFQ)49V1ja=juzVWiz|srwT9&m^g2arem%knT8m#w3#*pgviXCp zrOJgUg(|Fi#h;jT>W!G2G3pgo^y<;d6&Q8Ou3>Q3>u>-v%*_T&Iw_iF^g3l=7+fl2 zn8#ESToP4bu<(y&SpMHmVLr8AqTzghcDM8C#KBfDR;mbr#pntdBJ9?G`bJbmzI8d? z_f)mMbx9aW&kQMUhCM32!UT!ouk|a^oBcKCX1fy@5v?%#Z;v8i@IE;E&A0v5*Zf`T z&6Y=^Y77#7j966`7a-tAJJH1?l=(RFk=CUGn|HsItuF+{S%!<{ek{XW*R?Z>0;7G3 z{Gc{uEYZ?{@VU7rFHSF)$wEnCw#&J63P0GM0kyp9sdYSyp>mPl9u_Ld&dv}riKWO< z3Gd3?S&`!61U(9Z=1(9!r1I*YyAtSo5ar$x0bOnP^c?R~AH^Hh8*5G2cQ5Z7>>Tf~ z0W-pRK)!EJn0K!)Sb?Fr{DF}4CTy!`%#SylXYdbT03@=ZNKtwPXAY5UIew#C^q;n- z;(_?-uf40&jj6XWk~?1ruz;Lz>u2=fddtMOrj3Ne18R)fQ3z~&tg?{iJrz_=04oIS z8N=0LAmed>GLn}vhuQ~v&^>hmoWw1*#M_2v(o4BIip#FQ50_(QvnL3Vi~l z*h}4X?8Upo)h?TW&gdF zBHB+LEPyOr2@tG0@@JpFT9RZFEw&yvgf!DIT>^je&!VT?bx!JX6_s*oEng#sCQCRF zn+P^QRX@2H0vf(NRu3)<4E#cuU^ix)< z%_?U)q(Qu)nE?2=f5TOzWS=jkacNs!zjO-gJNEwQ#i>wX2mn9)EC zp&N`^B!q1O{nQ_PZziyMvUHQ3ktyG`uDb8DGiplG>d(D1nhy1LlVfoX$9HpaXPay^ zF5h|A>6#}VVK`sR^8LN~w^`&h>mGHBU=lkZt5k@g5szM)r2G&}u75?;LTH$VZ_DU9DPynA4J zU#za5O}w+g-3Cb}UJh78V$UB5$~L-@lJ_Pbsd;kgoPhNj*jC|O zZ*pgxePlcD?A5M7&TsByoT5y^Q(s-yg?3px+YK7MRjl<-9pQP0k4zRIY!<253AYxf zfW`|CiE?mJS>>j*P+J!KnXLPPo<#XYs$31`Qc}yx)KS)%DdijsYmDlL$%IT(07DG1 z%->^;f!_o?0^Jj(K$n)@xOfjj=k9amClaW zL3|g@=(=31>Jn@#Z5?m~5?wMi!)Xld*c{7Od!S%yCy-q)I?UB(yskZ55jwQWG{1YV zHRo||MJ%n)7V`eC$X7@u&4lJ$n;A|rvnv{f4i>;fNK(Y%)12eLocw!Wf`vny_!_W_ zU?|~-4-x6+kQ&gee4K715kej$cP4fqe-*VQqUYLx3q%_jE%|oxlrSx6c`i4 zt2CR~CvBsuFOZw3`-aRse4u6}?jOl2NQx$yh}>;R%1OjB{txFC`Q4$3Dy>lTX36 z=|B_jB893|*nRsOzXNYNMFt+L7-0FZxGD(U@k2HbRIY2^z45ZZI-Lun1}i)SfBx73 zq&0WIHGc8k)#6_OArNJBASk2jqxte0DczCI!+Bs=oqT=yrMrBNY7M?C*egEEJl4+; zpA$O4Yn)n^jOTyiDdeBQU&|s4ohxD%Eovnen2RO{L*Td_O`1nWqG8)9dG!p|>MJDv zRx^HmD*T1R|cm*(L(PrtZW?jTtbZt0lAZOolK&^ZR6!V{P>LO6c z$oldLwVW+bCm+rs#Y09dvr9aql6L>|vgg1)S`ZCAE=xhxB8S4xkE<)li9z0DJZ^1M z=8f=to=;Q=v*nAHL+PciHr`s{UB2bf%5N%FR-WQtcPIez27^kT(FCdB(Fr zP52(%BxU~bkRT8II{7I%(Z@lCL0e<^d;N``At$#lZIJdG6HD=}3_G5WGr5z?ZGv{N zjdq7*4~j?ndix1EU`fAxu)Fdm4pA@+xMW>hjK@j{%;97Rw!$7O9$jz-68>rLFz>&b zs4>UJK?>Agb+}06wRneIj8Wc9LU;0eyXlY zfgPkLg0)yo108v(XSpkel`Li!Bh3jHw4)RE=0h8Rq$3Bp{!HO%}O13S`WtY2nFsZO0TX4X3djuhEI zvFF3Y($P~h=)F@dKCBmZST{S20`@;%$o-;!oaQ-BBT;QWeUu#m>EM57GzQRBA41gy zNPquMwCOpKci%p5?{(+6KNd^=K`_{eQi1s1K7GCMTIo<>8Uo=B_u{!4@Vt#4cn9$d z$S-(qJw%;RusyFQi4<9kf24YEcY0ytL^>YQAQUA_5*x0fGE1F|Cu}$o87rqqi+n|) za*t?DfN9QwHCW`fET>{ysoYo<&Pjw*MR#dTdly0bL`a&WY5u0nx00V&EA>nqdm^8F z|JUA5f;c*h567m8NO%GQh#Py(O^^8(SPr2dX`U>DZSq{mbtZqw6bk_5tMsA>OLMpz z`Wk7W0;N(wj#pDlWk25*4(`)muRzC4{-LobHkgMdN2=hiQXON-XKUzz;8L(Bn1aFT zWl*_kkEpZqHL!noR^hsRLqtgAfB==SHP#|}%KaHq{A^TjJXYD_fwO6RR#Z9x{4v>s z+!Ii(PS6Y?j-^P5xSIii5N?Fl@#0?A1C!~7Y>EH5Q=7illc`RAI;Ph3x=$SRY154jp4M)2 zi{eKCqhOt==P2~#Z*Q>%w!X(XboqF#k+0@F`5dNHo9?doY+Ji!=Ys$GN$J!V&dqL=- z$5k7$jB)*IF(riSH;X7(cyzGrGBsVUC^d7>yH9H%g6IQLc6N6kTqd7hCKvIA`uW-Q z`)T=kshnCelZ-bPC`x2?$lF{p961;^?BwbftU>PHSD!6bzZB=eK!#>E)Th~6#mY2S)G^#RoCZAfYPmV zZoS_{;QaDZ^Sd+%XMYbmj5_W1=<+?RR@MPec6$g`N#q#~s5an+pq|!iMoo*za9ck{ zZN;f-+W#lF;~_S-F^P@Dw7iA_akx4*6mj5z4IOI@7?(z2fU7E5WWxV@p@=G7$0NCs zUfez-)jFu;$|%X$OC$Mk5DrIsWCSfSM`PE^mGjSNK+|ArFgVpm30s#@#gz@?9xj-s_ZSm;jA>>v;lok#_gMJ}wN4Ird8q zJ<&;1Koh4`b9Dj|{%*}>OM#TK{4zqUuM6IIO(Jo0QLH!++pLBZ1CHWr zG0aao7~OEl6ACFlbaB8qDT*vBiu`Y<>qWkMHg^PC19WqTMhkSWxrwgSvF_4V?-?Ge zo{_5){1C*_g)ml{28QQ}w6r4t6RRpc&e(BxjDm&t&$mLnDlO$od|7{cc{rY^uPvGu zsM)PB;EtMcRMo+aaE_zfeIu{dL`TDn3T)k@ArywO2XL^-{#g))NL%rQ^+gBGs(Vw| z=t={XB|Yd+&(T#zbq($$`8$1GSKu=EWw_yf>lG%_J77=20B*26ZXGmRZ!ol53-~=Q zd+PMX zH<9fW>4!9>^!d+q_p|tgbe^lS*r`y@oPz+fmX?=MU|}c<()hB%?fj>ssqtONs{90a zK6-X}Al9aSce38e$R1zN9UP2qhE-jV75yr>x12i=v=A?j;-HE`l_FAb+k())dmEsn zWD^!yk8nnIE=es-VUGaDGijNBhu0kdt?aYP2@ z;k2_wXiv!Sh4Uf8YAPnevY@h0nMy%<)T2B^FaD&0IB&qczYvsfcW#%R6svvDe%hfa z1BH(~CcQ9{LL+kur#CLzv>-3qJed-2=|RK8vs(dMd@L<&$ntSszUB^?8HGR{Xo@S~ ztt-nkYK~^ItUjw7rn>-e;gJYx;*kLRQs?KZ8W-lP9%`@>cj&aNo{%%N0-yBaKp%7= z5#J+Li11M*z%=QkE+H&TEY>IZJl6^%$e zW<$$h_Ko&7&!hwzW#a=`&2E26Sp|6u$Bb_j%eYR&@^gAelE}D_Vrb9ld$DGt&(G*A zhA=`}Fwu2%M-{T*16;n?Sh*j-;+u7yoJ}5)&Q}}<^g$H5r!dORf1jzmFfyFZ?u#f( zHdd6HPGN<}vsS;5_$>g-i{?p7H|vPmt~VfAe_(adOK$3ie_-Mw{2I6M8Y2|@;wnDS zHsQg~!}sSK(v_YVypv@J*F0R`PsO>a8@BQMyg3rRuJ{qj%!0R<%rJ{}RxQQH?iMvU zh~=qe=Nw^4-acd$%Ww)V^FC$rkK}~XG-GWr;gSScuvW2+LDd1LjnvFn6$3}gWcBQ; zhE3E;?d)vQN6I|BxzxK5BwMC7vzl;LkTi=ojajR5?P6^bMk=1Y@K%UIRIVFZPZ%Tt zCpPNlSjBXfrs|kj#Z^vMpL7UZ0Au?Z?SFZy6|NiCmmRpc+#^QHZtm9T8Kzc774iW)f(p z_PyZxt&K;rbR~TB1RTil3MC5 z)k<0>hh@qBv_hWC*ePLdk|=)(7IqdE#DJJ=Ze7q1?Bwd_u-4z_v(-1sY-OsS*44ah zdD@{@oQ|V{UL0mPXttv$y@hi6Crv25fpF5{N?znv;9>p6iwHf>LD0Y?oJ?H~QvXvlX{ z8V4r|OhYk)g)L!jGv3Y5G3@2oMWRZ#VFFGqy)>U()8D3>w=)gQfrpR7c>fItg!}3d zJ|Uh#&R1rNTl^JS{McKtzLMjAhF}8-17|FmSvP z_-*d|jYa5G@_o9pDStRL_Aqsp^3>zpHR&a&*H}lR5|L71D>DyEVO^8bTH1i$GkmRE zL`4%|iux;@x-$N1Ww!G%Yv3Fb^{LrKK7GxTVtFatdkz1eUkE#MtA)yJXEeX=?`h8@ zN+pTif;2?d*CJaDi}CAimF4bt1IfT@puI)+3YTy@;=fT~>H4lAa!j5UlJd-@v+b=~ zw{6ptcN_3!Q@@NZ02b}9238JEy8nV&n^i0cs?!Fa#Y)n)aZW?LTM-vNVfK%cG8Y!%5h8xzHj!}q z4xg)>T;2O`sMnIO3|yApi%{(Se{VA;bFV-;{`s_@=sA@3rFsPiWDrPEWh zr;KXT1VekfmW+;(m%N9gR$Y5w!%_^h{xQ@>XLt&3tA7j)_Zi`dC|>Vr3jQ{OT}Zk0 zQ}(o%sKhE`FKJv#+iN1t5Eika?8GEOhfIp5Oma4PChCiqn9zh#jmMyE&8U_d+FEj8 zoNb9sz3ouDQL0x1od|xT7^YfwC&ln6mvDA~)SJ}zxqJh;Nwev8;p*)w;_V-CymYnkar8$0m_YK> zweixFZuQI#;m_gA_3r#U=y%7N1r+e*eRuw%Rpj{Ad_}kq2o>H-Io%uj&Yx}m+~JnIZl~;gutO~LktG|o!5%E zNW5bAs;7;yWa<&KibKv_&gWUD*7)&QIl(F$3dpaQ3WyfU!V9m`s^S{+x&>bVc&67^ zBE%j3+FIoDcrcUq@JNZfLX+{jeC|Q)l<8R^VdVnjs&1-UY>!qClawip_DWSwl~`e^ zN4x`~U|dNpfPMg1&1Af78Ytw-BVMc9ynj`vN7ulSA6uO+6uMQxZnh{5ncMo)<`uyY zWt@3FzPV0}!)~gH`_LVSuU&BLVG4bSB>sZD5Bc!o;qNb0*MRMSd=5-S=OjUJCopG?!{eE z*T$j>tpf3v3n}F5K(gDz>cGbin$c0`_mK%R!o7iF)QIyaftIp72JH&e?NSB^)TP_P3I&EW%VZr~piRTuOMn%j4l2+x@N7XtRQUBxS zIL>;jCc`0b39azKEEBYVyf$~o<{%{H`4^Q$OAYdxWkJUGRe+_6NvCjI5@d0p(b+vl# z1ErZT`Fgw3g=u3ijOPgbkl$Ya)gZkj9!7yfb1dO^HFpZ6_@cAodRGdJ4>RT1duw0o zc=wJeO~vue|JhUmf$!&8l_)l{^H{qjt(q9}Kiz=sra2 zyn9m;5kYYXkPDDy5l7zKF=A1}!2nfNsEk=qqI9nfw|kl8s4X#~B&>9M~(ZhnePkR6)Ku=T@(^2-{QWj0sP*yI@R#h&=S6Akzr>V&O7xQyh z6ioh$9~5XS9xj-_8JMKd{exd7BNtC8wd@ zW8C%$HunmO{{HB6Lb`TTR{12$<%xLauBZq|a>x;X$3GimWd3zFo0KbqtX++fAB4^S&Z9O6!gtl0!mqx)Vz;i!@jaBqcm^-t(q?#f?!kQ|0OpF3y|PiowD zZ=}-9SYT?0|T*qj7%D>SsYw zQ0;}_H*TaiX?k&PZdbe|*U%od99T9H@4vL2$i7YO&m?uk#ccqmg~{dC4U6AyPxA+d zZfpPi5eWk7T0IBXOs14xU`N(0yfgV9t^7Gnqzt%2H+$MyOy-AFOiuv64(oGFuKyT% zx#g%E*KerwpcuWp&D9`Z+$KYSW{yF=Ta~RaycFjzmcu!OFT4N#@W^ER=ds+@O-{nJ zxC@JqKTqUfm@F=cZW!TSg@4hm#GJ|XPH8XQ_iu(~#qqh;!e`!wQubtKryqV(53I8D z4QMDcwb#|Nkt_e}sD&mH&XKs9+QSC1bU@IsW}IInkpue8z`jg9iX~T`{tbf?VksZq zAC6zhk`P+rp$jyx>p^Nk1+$B@RBy`6@1phjx!CZ9Tb$!Ffo5($g9_y)H}N|w&kb`6 zx&5Jl_!5Cb^C7osZZ;?m=L+Wjj6FCe=k9FG>C2-#;1t^fLI21hN=hqom zvk7GASI<2+85H+%#*-o{2%Z(%a_46-{k9+Bt2e%`?reYj;{Qkn{dDDxIi}AZ)1E#2 z<1qM~sY7|jMx>{fdTw^5gUIu#r`%PiZqC~PM2=RH3DrXME*5PMgI-}N7 zx~Y}f^;Fc=YhR2;lcf5B3Gc`W*er9?J)ioK~K>7-{ws-*01-IXUE z*8FWW131qauA6>EGYU9atR1;wy{GKHoOW&_Hcb8Usa}k+e-+HLm#fCiHueL0-}ta^ z*R~#}#5yYJ+q&2AyO~y-8Fo(1`T69o4^jE`+~_>!4*RU`c>SGO@7#x%dc#HNz2IVE zb78#UYm#>uh^tj_+*#q3VR7M`FghM?Z#}v_U04cvakoP>|J z&_}@XV%GZG_>W-`y1+0Y8C(E*kp)YZU}I|-;`RO4&d(!)C;f$Yuvm$sluIU@76Yrr z)a#LkOWy{Bwz&O_5TZ{B(?wB_Z2B2@l$njC+1npaDtOTwCinjTuO#&qLmc4*wT-S<*nS`xqP8XWtXLJIY)$H4KasGJI zu4qo5$ZG)x)f#T#mNT>`WPEYgu&cL;Tfh^{L^842;4L{>1Y(ychAGK zx{ly)hQ@4k6pU)l!FwJ|bTiEA$wRLXl95IW0>>YTIVp95E;~a7^f(7jRsx|%4GIwK zn-7rQ=S02H1HRi!QEPs@#PN|nMaI&T-oe!lpL+^F12rCjE5T2-V&u+7ghoh)riB@` z60C}XP3e8$6`yFs892qih{qA~{K1%EM@MA3*FZUBqL>q17+fAjG|?nUVR!KR`QlbCx+?p5kaXeRENXIZMRhiR(qcS-Q0;0cH~4iSY}5&@!%z9aMk5zM@$s zu6PKt%KEWHG0XdS(aH_}W=z{QWfdufvIlT0+#je|wcf{(EcG>60Q0AZ`|L)H=`X;4ewmgr0?#NH~(KX9X z17K50Wjb~5FU>{{QlYnE#;hrNdk&3@wGb9PJpQcsu^Zi&oii*?9^W-k^6LU} ztGl(kp5630UDc0;*KS8LJ5y*wJJ4G(E%v})pyvCxINw(`JUfVhZ<)Al6AE&lz-lm8G6coig@}=oQz4rq&o?F6w^3^&vF16aYKl58+ znlsEx8D@Rj>x2HhI0|Kl$36oa#ayzpC7d3rEz?~ijTIH+7v4V1#9ab~ze{y}81O7e z_AmbzU^RAhZdU`dO?{BHtQ$Y%-U75I$ck6= z8L>Bi%%1tnPxu=q--w!cf?6U#t;gxK!UldtQ75}%twwmALDNJ2_=SH}9FWdZKNE4A z7QLlw^{Y8d*8Ej@!xz~HLi5Tq9@vb!OJ_iX_42P}K@l+IXC{jA>c zb;4YE-4hse;gPSW**w4u)>j;O?3KdrLJ-Ga_Tl_pd_C|3=fL`#TxL@Um`t>;<|id| z7vUixIq_LeSdeX`cJY5*eRWVB&$ji!gKL1`?hptT+}+*XCAixJw?Kdc!6CQ>ch}$+ z+%-4{cYWO7{k~i8zOI_;>X}`;yZ8Lld)MlQE{53&OO-Xf%TiV>&F$n)Wvd3D5;?;G0PXMg0rJc-cI z+2g1KBSb2WA4?iq@};Wv;&3=+k&I~=gdrBQ$??~iB->x}4Kkuyk6Hu1N@1z7;_0%x z^Wgcn1{_P|i1&Q!*Q$h%Ewq7!8Jj4TStBolOHszP#D| zd6wsW6Jzg9J8Mm3&+YM`p?PK7&CbgRJ8jRsd1AwgJfBlmKW9Gg*5*vv|!ta@j7L`RwX3?o<8EISO z=1!$HsuZ}CyQq+fbJoI>7FOlc(okC%YjmaUOAfEQaH=V0WlRnxa8AOhaW zDVe7#8K@FwQ`&0c>SHxJW~98?jjt#Gx;`?x94G?U2KRSf;}Bfgwv1)1xtob)S!Q_r zaH3Lx`_KNrbGp(ach`4vJC8c!`&#B`z3b99Ss z)CEGZj~&&sa_k=0Re|9B?da_64DF~{hhe!6VHMjJjiZWaltp~CR9d8KIox>w?sG~Z z?Bw=66__G1)GM)H5~MtDg!t&W!G!~g`x(3y*am~okw@8tO>k~0p8R(rzo$!JbL(|e$h@_#b<6Z??~Qw zH~|jSv@Y$`2wf13h9@s)Ts9H_?W_3N6%rj@8*)@a%oLpb>p}9|NoA~v)Z0NBwrTTI z6CZ;f-t_y#(#gI1+>D% zSKOuT)EffYz6%g7xFIukmbQ6s`&lv0_nE1DY1E!BX_t@wIfG)!Si0>>>_y7Aok;(BF=ZGp5h=hG@|UYCODt8nsc=RJ8NjmY~H;6Z;ItnhL00;-z)V? z^ee||w$SPgOs0qU*$5^BI3$nSq{@ghG0$&Uhpylx1-yviNfuP(QIa7)&L(E?OURE( zC2X0*dq<6eAcAy0eL=IPXW5 zXTLapQm1K5LJDANAb6;S%l|6|Er~tmT#raKM9?B@$%%xo?Sq^lAKQ(Bo>>jf7)8nb z`lTLas=X@C1F@B3PTLXQ#*@FF9iS8vnJfw z*POjf%pf%u`On0oemUA z#aI<+fEf^UXPb3WK`Rwp!lcf&vmoOPuy2+4ppCYES!6z>^TSwJwGP6Yy^Z-XdAhn% zbjl9jxg|XIHpJ_M60mQDTOyvc5LET@vd-4>s5pH5+a>thp~NoUu;*zq3x|Z!bWW>P z(^;?!`#1(SI#)DLhy+jYbt_=w+whXF>hs;;H6;afdaBn^iZ=C^HJ$JUsJz zkeC*#L~<7{aeBa( z)MtmY$wn87v;aBU%~^^Y@p-ui!L&*9cCeQ$Vj(l#K>lY6Z!slP03PY0N~-E9qA2Ry z^u{P4GbJIe>X6j9B*t&R5g6l!uY^c+4hLm1;E*_cO)m2EN)vH33fp5|tDoN}PkBTY z5wpZ|vv6BES~ZiA&*8Hve-SD_zJ}gTz7`1|FH}}_1E_p8`W!$s<;xiUQk;^o-%Bee z|74MaR!!Uwg?wDageio_{Ov1}IM-f%wSFfK^hZ{_%V^n|^7JTk^4xJ3I2l#a<<7hU zyG^oBN6r)1h=RImEQ){~qebp}pKr+!MAY#NXR4TI;ibjpv2pdoIZk3}JkDb+gqq9+ zO}q4g27u7a!(mFVv=0wi@ngcIKa6QVtHe((N@FP;V&f`>UtCA{v7opv5Jrkzx!RTu z4Zsh#xRzMGwo4(9U^d`V+U?X#F9sbhT$EkgR22 zX^H@~u+dpFW9w<_1~Th>sN-Y!(EwBzL2(w4J2?1L+fIb7hHPq02_tneq!u@hBVC;cMCPLD-DfC$%B0(mIxtfFmxi$=Z3 z#2r-LkL@Z=JUD%ysB?QcT6GCN?cOqcFz0St__P)Z8Z$P+ z?V6cBZ0JdJn$x2IdhVxIdaYvM2Rj-Q!9i*253DzF2xjzxtbx*46FSo(xlcDf&X$OF zn4;|+VUCS7ShMEv7#G-z#xb$Qb45cnl!k%U<#@B2JD#2whkDaqTX06x(7(ol20^)p z!MnvvLxv(>aQ%dvzDlAZ!|`7n4gu zWEC$PhDrs-v{N#IXxh4cw zloJw$(w3hu-2xSV1Q@0a_-^S|nKn+&_eRz+lJ9=B{N9=8@b#16-`3kAt=tpR4`3d9 zl{5_K{N@U>KAR!TnL_PD?>L|H&oeD`yo!MsKG1r-ApeuIfDQKXz^>s0wb6-LXmH)t z@c0&BmOOqyMJbpSCv^RmQF=YcE%+d<8;es;Uf&ca@4TGelQ6y^zkK=b78gFE<<4v3 z*~iDnH@OT<*A8Q?Mwd#46*34jq+vK{5M)?W55(}BrH<;9P*D}b2e)bP>Rd#*6l|)L zA=NQ=d5y~u441(|%FF5c!%jt5u|JEp>}uL{|6&BL(X_q+Rm2-M_~X@}5f{r34k*vI zPU4vs;|L>%YRTmy<7|m!!n(mS!e`u!NP1dM=i=)`e{kzq@irUS<7m}TX3o7(Vrzf6 z-XjJq^Yb?Y z3Z^Zb2<}b(g3FlnC!Hj>_x&S*%v1h}f~f<4F%*sD67znC6?8}BLC3?ed&bz>NW~W+ z{bTGc@MmN**KfM72wVMY5=ZV>uaDzPGz})iWZ6M| z+#^}ZR=cUG2B#iCn@`0J%8DmGRanFQDmQa=dNPEsT^K8a1o-q=LIb4>T?UJ;BWE=Rk$NO-&lq*> z{mN!4Jg5iy@Myf@TPMn;q`p!^$A%dZE&VE*Vkf-|ahkzM0pD401r1DkGHpqgbp1jj z>GrvRQQ|IzNCNE!j$C>+!l(P~&C;lKM-bgw9Y=1osl8D&rp=@NkakdZdd|nLbfZ?3 z@`69=ZwMcRZ!DR!#5v}0E3o8~8xF<$!#cdiM&XSrnfv{fM*7aP=&+A9SXvB;gh}L> zPT#PaWS~yyVv2uv!!QImzTq1b$0Zvt6v+&x&!M?XPk^3xYM7BC+}0ia$`>Z*q;rvB zi-rRX_#Zyh<ksjI*5)uIsdIIr?t?)CbD>37M_uDDBL|&wP}^q}4RXJ0t`?*k>iC z((d}STuV1OJk?b3m{A0?=5=H7Px+MMe^&RaJm`Ou*_++V2ubbIZ#aD1CHNI``7ZCf zhxz066%^1^QGrUFPw@e#Q-i(ALM2BWO81_Ay9MiBkRaM7HjEU`yw-S=qWATw6+xj9 z3#_9LMR|x!GME6n?53I66wm-|dx&(m?Vzq{!{C)MsM+OEh}%|8_nsN^P1vW}gHhuB zdV7wxjq(D#Gag)qanz{src|E!u9?!N8&yWGY9X+v-v3v;8hV_qE?uh%+G3vG*Jc>; zI=QQs{;5K7Qz@A_B3(_@-0__pRucu5GgYh|#z)W$N(m?R3%GMum3wBuLb!Iv?}{iY z(vwMn3aze^#9QK(B*0VVPGQwcg5~jrBpW{}41&mb5}|B1gZka|s}3G%B%%>9{`44Q zX*rli}I(SIv>?qFK zlm&OIHF0t&TP*eSMJy1)8I;fiKP!?4+3DJ&dbF^R{(6Q=YW(2-v(27WjdjJ3_(XKW zr_;#K6VxZMOM7fF(h#}=9e4MLU3CcTa6k?|OH{+Ff>IQTbdYYZ?VXFY0z+=p zi%P!h?c*Yp%m~t4M0I5oBh%oItN6gUPT~de zMU#y@dzkIOz6O0&fzs?AnuSEH6+_6lho5_RXoFV{%#zo64EJFI2DcCWhx)XYU>9-&!H&E_s(4R z?GoHO@NYHf245w0#7C%GqTz}$n|w4qFo$RUL<1kt&o|2rH6)DG^6~15`tS;nDKLC(Z%M*%iVihqCJvEu~nY^9sx0#l4SO;DhU;nw7BXH6bz`d;G7R zB19xWo1U=#Cu!f*T#E{y-7ZK0U;Gv@lDBeU8>w6>K6gytP+CgA}zP=!M=! zmm|Ak1vTEcstSv~o#eQ`0D89F)&2Mp)7&=T~ncO(+&PbYJ zDCW8P|L%-~D+)yg?XbO)RM+BmQgqSLhC;MpH2-Hd`FWlyG1?Z{sM83qIn~qiFYUVhyrHH8+Q5xV zx`6xWdjISj$AN{7J{m4{e}#*F!T1@gVg#Z1$Tl5~R$82D-f4oy<)m*KL*13x!?sZ> z*jgP2YhDBYeDYW>OBLrr-R3}6K2aW{uRUSG_Lp9S`?sxsALl?I(7U&X9F&hmvdZ{I zuCgx$0^x&jK(01l-0f`~zzWNBfY!1!R=`B%*?F(vXrHLRV|54*vpuV?arb6Ipui*9 za!p>h_O~xigIUj~db6MIXfSnXsJ3f;;+RUxT3C#k&FPD>-4G+kPQ1Y5ybL5E{ z^^qRSn~1MlNr!Lq6eW^@W&g%Y`oqTU^Rue;NYC{oSw7k^p`N*7H{OU-;N`XbWD}3mYX;syICu`lj zeew+D0J_^PhZ!WF9fO)$_ZIahItMsP)*;t(55M)}(4AF4BVxw5@U|eTpM6Ee*FAKv z={0MoK7Y)41^i#h63Nc{mU9+je|}DqzD0R>DWCTa)ugt z%T+=)4V}t@jFaN4C5ooIkl`59z`y*~Tvns6UlK;>h=S>ghl8fGu6SD=(MZE}uB4nN z1(8X5dJmy_TvwUWmXcK#NKup1KVl9y=qW7vxn@#!#%?b^|AcEW+!7{=mvEmtKngd zato5C;%q{ez4KL-JB%Am_)+wCA{BXBp~#y!(6CKOc1JoQKzFq$Pg)l{d6E=$y!`uc z#c?E%WR~p4UsVWQHE-P21ZsY+^Pbd|1>9Hl^Awl-Gk!((}r&0Yw)=gE0x+&3j zj+q@yMsSrNDo6G}DsvMu8PPQs`rIt1l&LI6dP_!ejPNlMKU5Td@GZyttvKGXcFHo! z$fk5e>KA|xXy-{BloXA0HpG`D;#Ynr>*h|^JvgK?nbVZ$N>8^FSN(2RdaN!nO;*~9 z$^b_3{!Z-DBrd{?v?%)TQiuapM2Kmq2$kEWS6@lrN^mdh5@nisEV>cV3uAx2@rJ2Z z&j&REuYMD+lNJ3a(9Z})Vo8IZ#4G%tfPAOwg6SJ8!%(!tO9OlrW1AH>~itBD+4;Q`VP017@H7&R2wL(40vtci)p{FM?tMlr$ z&_P>;OV@$bXj+2X7O$DZ(xsWUL7@KoFj8sGUR0?Qk2(Lap(B7o58;hVwI0VQY?g1b zyH>L~E`*|ysO87cN$2^fz_E^7HyD!Jp!q%u=-6x|4HCOKbRIHrV$NY1IM(37pyj6! zc&lY3jJVSP7qiCU2KIt%0Y;{2zcLx8$1-YfqDG&8#Z=8w#zWNg#}KVV8MF}f*$QS{ zVgI^?`1T}49oF|>+}n0JihsJEfXg4tXbhw4P*|Y=i`eDS^OZ~lfd`1#z{2u&H4Cj`trexTT=(T z?S>sl9cD$1so=S`!M*Q@9aQuYl@!_J&cBIi$SQMK@;^^9Z|0Lx_xwq)%`mcL(ejBw zU%Hqx781uQ?o5z7nrZ&}=6s>1i|P%WF3gZ1fLL+N8>%Wt8eh&OCop1_hFGwlfQ*yQ z^UQS3g}3${BpDc(o)~xK6XF#A%ShyLFD_w}#ZtvF(^;UvPMDhMkq#Cg z!Q5Ve_ihxD{o+BEH5{#XG9hXF3>O9;Th5kMMv@it93v&r6%Ls`Ct15M5^MAOR3By& z`z$otJ*gl2%;!4EI&ao#b%^T9UtyB^QTs`YT_8Xr>m}*l`U%0+$v&w1wUwudZNC3B zJs5eKt6xD@v+*oNRg)9Q>V?nHru24;qBdnrIM~|J-@MN~npgN@&qDapQ9_a?w%uA7 zU|m5-MriHC+23L?UGvGwFW~zp(wOK!3Kk-m-6S{MOM|2ooj*6F2KP!gh0-2kY}__| zek%AL0}`2f2A2^}6R1mP7$?sY+Jg3XzFu&I>}o>&4wME;O4PlzVvljhyI zx=%Fy26!YO(#RD!@D99zel*bQSmLO07Mpx2r}iF0z99Gugg1V z#ax3r!4*K*P1^~L%c2kZFG21?kI(>onin{}ljYk+>_@#v!yLh{c$1G^bL`cpuD)2| zPD^|ZP4zBwg*}Q(Z3o)?Mk%_;5I-ARyy7B`M@v&~@B)B$4CV2PFo*0xlt-k63lKQJ z#2J#Dn_s{3dcPnDB6JF{^g(k>=4Yld=$`j*-MMCNJPdXe2c!Mmly4=XxOV}wyKWNd43YXjOs`izI;aV}P~p+Db1*bjD5 zm!|8EOEDT@cc=sVd5iP9_RD1Eom<_;bkTONyk`T4{pa7Wtsi208k$=P1!M4UdJG*M z>^ELS3lY0Tzk529GT;0pArGAY(Iy&%R_?TFqfw`U(i?Rl`lJL;5j^L{!M)xo$bMT! z|1EW|rpr~PVD_p9DcP`g3I#?dr>#U(@qJz?y9vKVYc1z(vcM~hvK-V~?uPT9bLP#e zxd6!AJUEynng722hv@d-Bk{hHsw< z!8UuW&=$u3SpGCd0JrU75dAN9ClKjBuKvHvSkr$Dq4)8K>i>hc0R0~$sOWzjEAO*H zFD3tDpRvyd{XPpku`dLDm<`4_;D#y40f7awsKJg04A3lj;Pe9(XrcT!TWs*b&ktaN zLk4KOf`5iOhYUpjlt|z9H1rk*!v9*&&q8qap%k=gF?jz_2F9@j1coo91?wF#Ks%L! z!;jRVgUZ0GM-nhC6(F!m84-AP7Xz$vOaOgc4gPXWPyDYk;65w}v`Yd4efTfO^|c_d zd=&wB?3e*&zX=2e>hZyv`xsyf2mv(mB3KZ@MD(Ae8sq;4U~&;029bjfSOHH%=>DAu oC7is6@mU3dF_(z{wFsjB&K3SW^}qjG)f(93gaMxV;9oEQ2dQQ^ZvX%Q delta 40970 zcmV(~K+nIx;{%c10~b(B0|YGq000O8001EXU_W9$Jv#sZ!jTs;e|%L~6hQm$A}J}- zEsfG4ol+t#(jC$%-6dTDN;e7$(!I2FE=Vk$(%lVb`2NqiI+y2MoQv;bVVBuwXXc&v z{l&~I5h_ZuSm>na5C{ZI?yb~&2n6Bu@qvZ{{xUGR7zutob$qMs41qkUe|#W(zcH7G zKwd!Pq$ECgWbDqnf4ih_7_;9W+?XIDNd%?i*D+bEM3E-hYA38w_Z)pO=~LSK?&hzA z$EfI>`ESy7rJ!$UFUDCpEo*_!E-`&w(_(yBz)javHpy-@HdK1gDq<#T1xaG}DE0;w zEgZjHF>*ZrV1t|@(F2w8Ek}z-?e`rhHVq98G`BBN9|E3`e+r=p;^M|bh&QFAq@Eb% z;Njvv4G2_{lA?qBx8c7H|84kh!~a!78~-$N&ad`^i~N!j-^wV7;1u2xB*G|U$kLHZ zZgH_qg1l7XkR?y(FToOqi-~R1{+Mh#-Ki3ACkS-l(+yF*uCz{5nLwdG~b8)dTP75cOMjEjqlS}p(v$4Vc< z)<@;y)*s_jUy(6i46>?YC zkHf1bv@l5~78VI6d?}ewyvG^-v9HUoIYU1_iOMNL2Oa&^U>458#wSXCql%xRz<~6` z4M z;14r1fBxk&uP@)$WOzuWWo5}$j)--1(o|JdW3*Tz^YX0teXo)5>0<{6ZM9ekt84TF zi>!MG1}t(6Ev~MR`9GOap`%AS-QQ|^2-`8Sm&t&xtE=_8`@dZn9V}#mA-vH{Mx~&z z+uYv%oY$Glg#KGY==cP~>H1U-@+zqPMYIeaf0#?gutL3__Ro^c;ITsG7tG9L4GmYD zyS9>keh7j>vkW(Dc&hA}F7G%~2en#!)VD8^k_JF!k>g!Sp)I41=eN1;MB|%Okn8 ze~J3h=^7dug!`=zYO8;T`os$n2ndrPkee}wXUz|p31UK-sg8o&2oRKDgsZEN#P_^u zPy?SM`jjCzd_uwwlZrD>DyuYfZC(YFtM-YG6i8+lyu^=BtIUUF z_4P%-lKtBt$|)&9CLvMsIqst#hc5<+f4AYNPb__Ec!qYl?6%RqH=Mbfhq2szvC!!e zqiFG%`ygbF+vAKXUx|k^tb)c;k@exD3k#tbM8hvAH96h)pJebm;n&nSlWeLzL(B{h zb-6nB^bj^FQCo7`$o>A^r!tC@{_8jOb~CR0S~qNT4Eb`S_V;fSJ`W_bgLK4qf8LlZ z94;v>&FoAlAkqk&B(p8|4@54HadtpK%zW$E6GtV(PfmVVKW*E+JB@)#!94o+6KSy~ zduI5GV%o*RY@c{*cg?D~{a0;fqBm`2!%A*1W!8Va%W>PCU-#u0i9jO4tX(Q?w5K}s zc=4=_>fsXmo&3frM*ZX@bMLTQe+L*%;C4OpZs$=8)FZCV!4-AP`l~Imr0v?B^ z9>RO|tFfpIjZA+`a`2uGn{l;U*F1sn+Rc1k?TK5RsxVbiCwRC&c)%{6h&Jj z7mbEJp;&Rtjf-~&;XSLqe+!AwQV@gX8aU}+1F=FDRR0EJ>lRJ_>vxExjEDD_E;Q{S z9fAu!RmN?rM;_$d7Hn@^YZ@0TpwJLVfMi&$;)f$kbkBq4hoIR^k=+vlX-WNg*78}3 zj+SooDjcu5Etln3p?zuf2~5aVqYS<@$@$5=0sz*as%dfnGl-PQe;>I4V3nChSQfo{ z)!(h}Ub26233c5mcUqMZzQ0`k)e}o47E8iyZnQBh2kS*E>s{lz$!_2Je zEatvXo}t8O%S*;)f6#;`-rbj2=+sR3oF`oo;Hso(HXVX|GMj|G{qWh#6r(4BkXqa6 zptBt~B4lP}X33HNFcs#5B6hosbF>^rwQ@e|(3AGs=EVS)eWM)-n_hkR!*yAmii!$y zRM*Ug9~M^n(SBpuD%s+t%_cN9PVC-tWW&M6rhV~t2!6QXe}A8>)#^7CptwIyg_ya? zaMXO@i(_6zVl(+$!p@GhvInyTvR!41wt z1?ST6Gk#1IM0{z)7m`VMabX?Q#F(05_R-x^_3{bAznh;lEhv$TYHVIe`7A3Z^4rIUp*iydqp9wUj^obkevKgpVqSiSN$*+c@e|s32|~G)lw=pY1q;c zZ<4S$(jS8z`8;gYGN_bUMIB2fAED9xpnIijpxob|b{%Gj`J$q{Ps{ogS{gTdJ;j&U z-%o<) z(0jX4z-Y>4xZLD8778t$To7IA$jLK8R_Kq7U_ZHSVQJO(Vi#FnLGzV`U z3EJ3rH0KOkzS_tJ*hQwIsTnposd4Ee+H`-tQ-870h(dvXad+_G@b&BXYFi-URI@i< zfpT^%g;1?T0!zy4V(f&kmXwgtwh*ufI}zv3BZ{$Zg=&J^4%_HiU}xh-@vji}VgKzY(Sk9x|7emOexJs)UK@l9%3AYd8BIafR1m zA)4y`FzoUu&Oh3be~#PEj=8e(-3?YG^UIeVk6|uUrV>|F#Im-we&6G^Hxqn!bCLIJ z>oTQ3iAD1O!5cpL&sUU6ue4K%f3QM}$8JxEFp5TK7Q1o&Z(%4Ub{GhEsO|QiK4F?5 zrO%=2eL5!IBec2M&Ww+2#HVs6|Da5ydwO=}UVzo~_b)m`Grhul0LC6rkgBRDT(aoe zp{kXFQ8f*SRyVguC!0gDbR%;lF%;1D8~H=*bfCUb?t>Brqe6@DL25*zKCXo1BAT zk98Cd0P%u}jZMLO&|IDc^JFPlIrez}4mFho6NN>)lKf;n#y*iy9qW$~M^4eZz){P6 zHD+bDy;O3~vK>-v@~Qbke_@8i2H2h1bbt{@v?b4EVW>R^vE%X|<3|9`?+u}mQb;Kr+*j1pUlL76ogR3#6%$Lf9|>o0Z|ClVUUE_ z`TqQb#p3R+t$X$lQB)Hyxx*_O4pP?34l0A5d3S$kVG`Sgg5B8l=`TjvGDa$lkRV?j zg1k{QF0S7k#Q*&A=cqh#9oj#fx`__70r%;j|pl9Jp&Ep|&7;vw|0`>Bm}CIOjA_vOH6L&%Ei0rqgK-1Okr z@nyi?pt!OU#s7#1wlb1Sp#^n8NMdV-GKG&?O1qwsfA}zddTM1j;a%;S(D%LFhyzGT z+$bv!;%F6he`Es(#^Fq%C;?|eV2TQ!h^R`!jd*ud@R1|){_O01?lTK*e?k5HDGfI_ znT+XT!G+Du2rBPXSaFv1x$^l2W3Uiu49F7Dj29>l0Uj+fug#?5dkYesG!3p){%AE> z3{zwR^q?!s;u*GKJJdCsv|K8kj}P<63$6b8K#V}qf2HD4p=|DAF5~zUd!(zy^~6nA zp$cbcNJ!uY1R`=#YI?0_;M}R^F&Y1Sm1D()i9#WhkFAb8w=Y~jf5q<$eL6A0onuAH zLMU(jMVEz85zEq8YbzmXmADZQ!x2d-*rHC8EKZ zxx!p=WneOjy(Qf6Zd>#6yskAy*>_vb<9ym;`N#uEsg2NEA;PZqU@FNmn&7Ne$A2c> z(NG>)ElI{avJ4m;p|0`a{F~hTyq#&i_w~0Ge@qmTv;yoZ2LbIUc&?RG%8&+AWTc0% zDq?AAR_t3kq^jv44`CF2xK~;cD^QUl2+Xw44%_$G0^qYi2QQR`mNLzx0I}oudSr}M z!rM+!c-}KI?*%#b>R>82TF4tFX6BBCW^Ywp-Kgkj1>hrB0#Pa3K}nI`oWDNRmfnl= zf4`+|x!e1nV)eT}$u{i>#o|-P(pRUwJT273@fx-%d;Q&4cs@G%SI}z!3Q7I_Tysu6 z6IdULii?56+ZsQBmPTJ@d7tl!(>)LE$}@#!i7+uSZE`aWZ=9HH496Mqq^auZ1+A)_9_Bc>OO+agUxU!T87 z{K7+U>BKX4M{s6#7NiV^@Aav1e`}VD7wngE=vWQRjw`j|(^~LU^%kNQ%esxjsPoWS zdfeVg4OYYHUa7t}Fo?J;22IxE16Kw+pbrswto#k%c+q-P99!N7ym%vEn8r8UpJxhCg8ENURDReOfhD4IOc{iA=XX)}KsN3)o z-MSSpkF7fL-Lq7QWZeDL!Pq&f!+Y?ego`9{98KL z=21(9JMtKrT&8!{+%)M!fE>}ZlhhC?y~Wtm=OQe%pwPVFeWh==Mr09#gZ~WRcQ@ai zot?j=3tcY!OSIy@`$<7zSARA+@oP~?Xv1hA>27n=Qbt>wq`$wve|zrKFW+mwVdD_M z?wRNAk7R=+A8KY?)` ztZ(0{%6+a@5MU%R{r#N=2&PY6j5bNVgef1HdF6;7Us`VB`;fVFt9Q!K`uL-rtYj&S zO;sq!nkI@J_d*6xU70cU)sm$VBlgkzKLcHZdS!ctcAEMif1E~NQD=7pp;Q(+KCW~; z-Bf*>@cH=oQ*})ZwU7{5WZu%hzC;ev7^YXR-Z(iOc{F|9q0!H|5Ou4X97M<|vSuhM zGRO8^!JiNsNvt`vvFY#QKz%NeDeS-jXTvn(Abwwh$B_6GT6%G{;%KMMKnnU-fr;L* z++Srs2%Ozmf1-5lO;;h#*7*m>Qu$)CC6jo1dV1WQ!HsKlme0K6)6=7zH-?Ru|Ab7n z_zF?_T`nK_k>Ht^!?d+|jo_tsc;|CU7Nei%b@7AuV)@30J)3nC6N;~X>=1%-tYiVo zi-s_2{;)&^IBtnEY-b~v;1xnPy-16!E3)0W29lEve?}Q^SPCbi_;ra);r`}Iymrg& za39+fYR=2OnTv7pyTgT|xyTl6D{z{UPvev2q!)bdn?BnK(pSXNLwFH$f9^^^Iv{>~zG=A0ct;IiN3 zK0`-GKdj2W!zAOg2Sgtai4vD(f4!Y&*!U z{YYYR$;6wcq{mImUOOcZ;%;u(k&y+!wnRAce>UQPgwY}v73Be`f4iEpNQkDPFMu5=r!cyc-<> z?^kF!f#Ckuzj+Z$E)ejI`2c`EGn;{M*vyQ;=B}-*Tdj9xl*o6R9{jnVCY%I&d!n{e zhy2h++WMRQ8#S9g|1EjL;QyHCOs(q8e|@LSBy|mqr;w?-GGm2{)S^JqQ6N`b=1X)h z7$E^42Q+ejtZ|`Z$RD;Ll2OY&CuDCSfT*ayHsy-)P}8k6n=m6Xe%yhPO3W!QM+)Gt z(Ipj%E5tZxhI4ER^o{P^DXvL>nF>>qlH!YR+}9D?wwZo>!P8f3e+TS%`{`DVeo@1MV-2dXZEslGW{2S4zkfesJJhqFsIm=sqgV$eb$(IX+x-%-r$|A) z1I03)S5vh}^l50aag^fP+VuA)VupW?*NR6bCL&arKC3W=BDF5b!+oYE%Ie zjVnY%3C1O9ev&y`_PN%qF4Pp{4$B@`?ILqzH9F?phm)6XMyHkK1CmfBWvO@+DRK z7mP)0>3j}K6k;BS9Blf>CLN(-rdXMud0b^1lvz*&<6jJC2*l~yYX_F-q8h;F4{-8+ zTB2<4INYwuMf^3T62zTf+HX8K0<}E)eU-_bhK2^gQHA{R00Lw-4bAh;4_K!W-QwVB zF2_j3W~ec5`jMvp*+#k6e_*0Mewq|8_sYT*PL)xdYmPIdxbqwaK1W1oD3Xc#m}zDa z>SKS)Fg>bF9{W*YA;e%I955paw(PgE##n~yrZY4WTj}p&B8)NY-vet)Hlo>jjg=wl zi}Ig6&^L)ppk}iluBcSiUtbmo^Wl};SMiHYhY-KL8YzrMM$Ftqe{tD*&)ss_X+i0O#^loFQ3n$XOCW3`1O|@*&c1V1brT8%Bv@*+^r2WseoRHR z_{7Apw{+35v2NoNtO3)5gaN7n`?4a8c0B*VbdiItwr~Cah%-VFNHDnu&ABmA5S6lo zLywPNUHTaGR#FkMXLU7sMlCs-Egi3&0L$UmN>!nSpR0oA>51G6E0bX%41sM(%gb|T z3@fB4FkIe=l8Zp8s>~cKbB|8i9id~sAhFSEE zvt(4g8Jn`O@$nTU&oTsk@|e+(aaLT$a0d-t$Z`wk8^`ZZL7of&%58cenEP9~%fAOREDk394efXeonD*U8zZ@qs zJp3&L)DFmSkjdfUNNu7nq$A8_w-`V!e>>OGO((LM{M|TAPeYRz$T4Ci?&?}qZa%bK z-BW8j&Aa4UQj+X+bxiX0>xcZD9FI5sGE!2$6@8iA^9|07P7im?PDd-LeQfBsql&?S zN*!)6f02uv#zry-aAJ9*Z?nac+4S2?Z+RoNuDU03l zaB*`VjjJgyPiPv3EFSb!n1)?v%+Ak;3=XnP*`7arDKAXf(c()5m__hkL%Y#4dU|fv z_kXTxq~L#DDNu1JWq3$4ck@PwKYsj}TU(0)e?iO)Ew_QQLD~Qw6^q6->al?Y-*dm; zp0KH~(d?)fzF6+KS#sOR`~Dp>40|uA6xH$JuGpA|^s&G-finAZ0u#$VpNWmdCKve2 z#kTh1`OtLL#?fvS%ygkinuFNP@;^sEZWWgJ=Ld^1d<&O2S}f)$f{Gd37D{hat5ppj zf9`J_R%_sXaX?A%+Bf{bX4aTX6+F+Uxw@E_&c3`I~xy9~*miY-OeOf9}i~ zCy(8xU+Y`CpzZCe)%Vymsza$&5a)H4lwFe%e&d|6050yp*EWCQ4I!(~cPle4$I=VvfA2AGC{5XrpYEkvWFbHkUVKsI7v#Nn_e=*lyyfIOO z2Nx1>S+QVhxdb!*%+ysCpjITu@Kc6lrfkhqhETpHCj;2gYxo0R}UR#<9r}JXnu-A+Fa(raJ_3vOd&7=Fk-HHMi%MUuu6!JGIsQeMc}Y zo!#VM;iBBd>MrG=&H{!of9L;@Xv$6;Ll##^5t-oU6;-+e(5zvYo|Hsdb?MBD;kQ4~ zXK^Y92C*dEubkHU>Hn>*wVPGueE*JiR3Ev%K7fW|3ZzFz%Z4mfbCiJddR{$05nD3G zf)V4JOkAK#C6Zz;H}gxF*w`dwWrfJGQO#jWi`RYPjCr_MLk@nie~kw%6h|JIk$E|# zrPwn0xB%|~d5#q-Q=K;p3O3_f>PMbSsKw!xd#e_Hg~*@HY++~0Zx*OC&6 zrh6~R8B^Zb!}P?rg z5mAIPj+!)vhIVz^BB8(vNZ81_g-=CrncMm&^%&(OwGjk|7 z>%hhQ!aEw2xlZ%-7*JqoCT7~*z8AhIvavkTzv7O2<7xSQuX*VDK0EmC$O#{|CFVop z2EM)C4pC(@YzcTv$FqB)$iUs-UnFy;#}{e##oeW*f6bPcmNE`ub|*%x2@rgh~KRMfb=-?4;NSdP-L z=v1TW*77U?olAb^>$Yo2d{$#IyxY_NtuA`0!3~3IQx5t)m-+A0Vq5tT(l3{9G*whm zuAANK5d+4oWSyL@?=LFqJ)D+Wyo1}(qPqTWe^fkfW{aNQ-u@Bov4Bx}5cK_Kxybn_ zckeow!dV70T_xGf?-FNx-g|W-TW+m>{Ux%?#M06Ot8URxEN>KzJT@A4(Gc$X`Asn2 zH^#!HP7}`F%ILRr3EQT8q=mz>u>)&wjE$E^7|(l$_ZK@~z6v(~tX-KSv>B4eBx%Tj zf84nmaiPmXm|l+H)HeR~z@@$%GhTex4fXLn-TR=Kh=PLRa@f5kIQ!vtW2)K;FET3X zsF(J_^yZxJ=y;&@&-tFN>=`+G)1h7Gs*{V03no9mtSUZuE=7aO6d9z&66(k={PBEo zA5cY6(Nvv-VK%9y;HV`&0K>F`-2D7Kf5C5#pXeT5nQ$VbbbV%V#wKLZ5;l9dil@B+ zHa>?*a5yXG&yM=|w--UQvoOm0Kc9JnyYT4{bKbH~D`JMA#qyv0l{0gcdwQ3H_ z67g7xU_9)^Q_lJI>#2M)!+9wmY0S@`Oo0<=!G^ahRF1QD?$Mhq33ST~-thKSf2*#Y zvesB-|Ld{G%}8gZ%24|Xt!LO_xkaylPBp3MdWF zpwjgC+fu>^sgY6Kpd#y$vrbAJe>uV^0y{g_dY3I>9z{tjD{2-(l8Tg2Erl?ao%&Do zp$m&Q_{b?@{}BpTwbM`pdbNFr}>GkPjIA>Zois`NZ8v?e6TmX-%ii5!4hB zZ{C$7emc1~FP;UI?l=nGC|N{znT+sY0oel3og-D@*P$|xt8z)efA>)m`=R>gW~`pL z?BJf&V1SmSF(4P-NJ~$o^k`Ps)j{h05Jk)M8a{Jdq?J1wi&)z*2ZR%3iKBknm43W) zaSi}1x26X9V*X;p^(7u+W_L`Ho9R4sV$};)x46KYC%)KpMesqL0PE(qyL;Z__NXkb zz*6n+N*8n4z{w_bf5GBQIw(^B*0~pvbP(W;xVWANAK3EhZqF=SP#*!a!}FR#`HnJ1 zX6fQ)bbe3z<44v{pI!mt0g39g{6~9twvH@{Xfdd$Xq5dOPz;to4!wlpGc%*!(qZP0 zBB`tAFrfpm*maYg5m?IyepS`LAS5Hh*2T4^!;I^X9rN3mfBI}bhvo2nxOZKRBY(6b zKbrJtPSIWQHMEqLc>t@b;b7AbMer#-J39`uCboaf71>9mgAe&TUajqBeFYabFdI8N zX(c7>yiulT8Pn{RlL{c^$DJ+@{qPCaa`N*jc+*)Y&Yq)PR@lI?=e%~_4Q97KV^ro{ zPE=-Vx8#vXfBJ||NN7CyTX1)N7OkufYvs^K@wCMN<<(s(8lKf zoSrqoGL9>Hhll0vc0Up1G&hq72w=ZWz^%{{wmZ2N4}Ke1?0dBA_>3VjG9zOSL3W&x z&%&wkWC9&N0hWPUf9t1@+nDyyQNR|yH|NF+O`gBj zCtU)uAsWT6C+OJao`cTW^y(qW10j9(pB^^z;~ux^5fi&g?^FWLi%S@S!5M@Gc zdZs=mh*c!lp@}p@BJJL)qGAt+X*cHQ4vEhz6uspZFNnXQ=*6G(#s~NIz1+LtN6akv zUS$D$ql({j++TKdKI^bFEEHx?vtQ9Oxsqyee=lUqixruNNxBKpxfLk1a?4SZ+ZCe4 zj@v666k+#Q{BFC?*bIb&<3z8~#7$-<_UC7f>k1zNQr?Q)Oh2>{wO$fBeQ+OhWwFX5c*#5`hyxjS3W*yZRCt5083g0sJ!`-QPUk z`5LWCl5m+){0DEu@#R@@LXLm?5ZM`se;5`dYu{b2WXm|9C?Nh|nc`Jw#N$GkXU;9BI9=6GLvN3iRje~X^) z&qu2l?{B@-&C@8|5nMU)u-4Hzt}v?yRQmAzQc}027`}783-}lEwfhiu zVa|UI#Tq3~Ahz>Rtg_}w{!!^2Mvyb4F?tsp6vd#ffe8gkIk|Fo!MB6RL>ReylP=Csox0kW5$e_ZCpQgEK0Z?-@tn-XdL zLPW8Vm~RuJ>E)A_+X9;bHcP3hK8=>a!zQ1KnwzWiEBh9xgttCbVG5wNr^03Hebdc; zqsP@+GULarP4R+1&wuh2x%h61KeVdJB>>cX?;F;qy5GFd6W{e+Zv5}x=-b;T)ipJF zMMdVlY3D)6QHL`&WhN6vf3GcV=cWtUMeyH$NldhK0ur)Ni6Sho(`kRsYJ7aKZELI$ zrKt=C$BHXNOig9u;WPX^@C{3G%&R4U!Q5DjMdQ+EVZ63DSLGRRIu`h;REVKbjMZ*r zqGD)XfI+D&MArvNX2$61`mwpcA5`$Ge3k+&Bq+Qqk0W&u7?Hypf8X9gmv&2qT?i4{ zWx4APnO4<$?)YNFgmrKMLZ$5+KR*KDEW%C`(hn0D>JwkG4UzO)1qnF$>R#+^5FutV z48r{HorUjC$1abPTccffCYR^k7rUA-7KEm|@|-y4B%GRU#I{->j$a%34g9VidLjwx zmjC|1U}9mB0p2KMe^>#aFF(1YJC=5dh7`?mmJLc)u{{Sd(8FN#sN~p;yKc;L`+;yn zcEFbs5_vZz-@p5p)bf?V5tEs{+t_dV*dl+k@cq`%-9DO`5rCAQ!!MG)eZ#7n4zlD4 z3Jt|kdH=WdfNZn*`)`ElXU+zM_IO)ZQ~CdfeTg#o)ylW?OWe)bItSU0A3s_js&kE$VY#WR z=S|!>L=y$jz@o!@TM}Rgje?Na!@Y~PUPHc-U)H1Bv_j5)* zqGEu@TlIUpSB`of$PFC0&4=ENYaF!tiwtK91qTInCO6~)^_?=bo^`(;?@-F6Lgn>d zm@;_rV5deSXuOISJiLs_W%8d!K36`=-+@k6O-CU^OFK*|XvneK&8ujiKcywcY%{Ow zFyg>0e;ynw_;!K0u){83r#Qyr;$GJQNTUAp?*gkYbj*O8ZXP+6nnIw`zt& zh-}TN)>bTF3Z4Ks&|@P?VmFLc7QI5!*4DPPwti!8Z{KQD{6Dt>MB_p_j8haCejBzr zb>`0j&_@pI;Izy^4VJK-u0)=g7&ET&tZ?1Qf2BSUOD*`zbm4nbYt$XB&F{L61W4rg z1aB-jq2|2UI1_|W?IEVi1oB1arTuTkecjq7fg=4?UfX)aM_z&C_4bqcELz3&1Ovo= zK_0v8@Nf=0BJw{HlhM?oletSy(ulskCX)luLQ3XtvOjml?_w#oKBUWGDRv!*=&hoo ze-r4up_Kpox5?bV*u+|3!7r7s{7(WQbT3C;7Jw-8^d!*HVUtu>Pf;-cX>CH~3LGBL z#DMC75K&j>cHEzP`Qd{?*y18qb{W~IB@#-oaW4sCT7iV>idCF6jj@^8R1T9R1;hk0ySrCuDcA0|lyM9+UU2@{kT<-3E_#`RulSN~>SHKykP3tl@|yU3MJAOJ`nc#tRqF^kN)+`0LLde~9eDT<`l> zttE5g=efzTfjM$w9Y|tLcihMivUV(2GS>+nEJ$f}cOJwXzYn>NR&D@@t*_eWj(axWm5Fg3M3 zNKT6%2@oOJ(od{5w)CYV;(}lbV(C`wCG2(i}!?v57+Rc zU&_Adi&w+$42^Gjlw3saiX0K;YTHg*x>t{0P{x@qw&?#XYyCJk!2h+jR>^i@j$`+v zOwX!okaOQ>%WWgpCfqtL(nC0FyPvq!LwLb?ueX$JaIOL4#lZ8Xe|u~u9i&`G;!69@ z!PKh5NnMBB0&^6N5}jZ?M$z{6;2ZEJTVA350p`bZr0#ACAS|R5hdOQ91r2;R_4PAB zZjm@qLHo?!s82*!`)-aE!alPgYS=J*APDbTaTF}+?W-{9ikyH1=-M~9_QHF`3!k3U zl=))qz{`3buPJG0emcxRnZ>dz~Qg{=vb|)3y@k>tJ>Y_V$E-jBzkEC@9+9 zE(M{v(k@KsfcK*c3h*qgtR!5|Fj=(UV<+}~HsS~$NM>KTe>^hPYPux+prH|`>sk65 zOl@zk1W@?lRTJ&Dq$WOcGP_~Ge3l=%#3!l7u7s3Y_v_Q8f#Y)ndC^CfD`;vWFKfAe z6-y}+*?7?6J%!QmvnaB=TV^m*$o&J%d;5b)d_=BHpYnR|8d-K|zHA%~BO~K$CVM(A zZf=mn?|69kf3rV%M~_Vr^U!kHvAq>|UdieSbiVi}gwvWP=;q{Z3h>?6)fAsYRZEv;?d2MY1EySm{x0hH= zF%x4TTypI3&ZtW7A~)4sbU9(yH)cEE0)w?R0$TPm2fNxIKkP=lcdkc#S7}pg%{hp1 z<77eI#M^$-R7XY$cIsugu;fWdYaxvIY+RugHu102igdEfAJfmDZ#YXt@ZSyz7K$Ji z(^be-f2O-ZcmKCy1V~Y&G5d2)9kyhQi@8KCMxWJgMj7FL->%5z%vPsWnb=QMAs+`c z@^Y8O%FX(2vnpdmM;gkb`FXG$8|6p5=WHrTJTV>OoyHpbL@47rHcoQO%b6zrElf|x z0;S?14Az~;DA8M9o;xz~-N?cMx;}i_@u!OVe@8IXe{ZiWd7@WVtb7TzNFTqNG++cd z_Ntu``SkCFQuaMD0Egfn#M42Ay!4cwu^18W*yt-!gM$%_*pcy#i<_LHh#^H=UT;{D zWdwU(yAcF{AU^$qT5wX%j+eFb*SB;TGsN_yF@H=d+Kf0JDcX|fk9Ca%{Po*}j9Te_ zf7@D|x@pX1dIvLSNDY^vE!Wu_;UO@ue?lf=&yv(gBFoGy>4mY&&?QVsu+q}v0p4q9 zxH|L_DW4|i_ddrzK1P2rzy)n=T>A7ezql9=R3QPu9qLeno_)O@V%eH#mRDUkefkum zN!-~epuGTqiJv`$LHHvXvpk*v`PLJQMLzZI8;0ZdS9iSu4rlYb@L@xsh;cIwe|3eL z);&q*(j8jRfuNR_9{ZA+8N9z=nuXiJ@TlyeP!v{H%`fTce5KxwW8))@ZaWt@*RII) zHa&fc@Bj7z%n2ev_ z{pUFQLbB{p%g$=6wO@;66hC?9YS6+h{}FF~O)7d4ab)*Ay4xz4w+0V)fAn8ElzAEt z7k9Lr^MB5A8QhthV{RPG9ia5?>XWR@$~rR}~(XJzU4(sl0+_ z<&C1532;t*`0KqM|FcN2e^l+AnReFS^dY?Y-dJ*h(Dn6pcXyg2k{B6~cO@LD3barq zxU0|UsGC$n>iw&s1`E+A2(Or);_2$q!lD$4aWYAO(}^u_03W^4xfR{T3b`j>2J<^U#+~ zSNmhkHK3UHIWDfEq9n?-|6MZC*_m6t9!hl`^XBe{rHf*%+QC5iFLE+l*U^HvsF@jE+VbN(jxk9#0VqXy@5}i|*ErMA zpCvf>lnH?#>OI~C z`2X^Yy#HM38{MQghUs}&ZS`sY9Hz6D^Tp+~HP6Q#CXcQ2X&)5*eH>d#! zxrl86BM3(SZT^4Q90*}AAPyW3>Y)M)0Fjeck}7>;6!gCUP)h>@EdT%j2mk;8Apl(` z#U@M@008KO001D9fM6SwujvSryE`9$jcM3Dw#@sbooL|LaoT{Ls3XVeOb`S#Rg#HN zq{53Y)0jcett1|7o2}n;9z5*bL@^q59tgsJ!>;hm z9-&9A{%sJ94s9n6!dAx=XIo#OaS+D% zd#e+t6WydHS+2Q+E$Ix=5kG;ToFObjEn=!#Y zKp}92TIub3(T(q1gn=IzT>=ZS}$ICLG`%Xo}-Se!rT9SJXgeK(G{=+CHq zv^v&2$ZXfe9?}kaM+7|}tJ^b| zBk1ZB%%;^j@-I;6#=;;BM#3oQBQM9tyZOmAa*`O;z890V9}pVjsL|a=U<9FWdyQ`H z+96y71WWe~#KvHE-4I=Wy6EbOZ=ZWJHdyQ{0KcYM>U~JuFht(AV-FDv7BEIAKEeIA zI?nTY#$r$abQEIr$P83LBi>X1>Cy`t`1OayFg!&O%A15C^!sBupyMQ0Es~Y zJ4b8+?I;NmcFe~K?rSh$fjtXGE@-Ss)f^Zwa!|HYG*ZR2fD=O=%mJqUfe-Z3%|l)=kqAEz{T%ETe6i zqNtme4g-T~LX)KFA`F|dq_qvWThrl(BFX@#N|FL22V86$hG8nYA?ymWB0@XvQk7Kr z(BY>h0ya&9Ynlb!>nb$p7Mok_Q-@w~RWwUUwdCUPQvg4Ijk>`V3WtdcI%ET+!GD`+ zt4GiS2<#H2E`J)@WAwYg?ZdF-2fpBD?UgqlxRDbAu}zS4N5}yd+rT6GHHdyXXza|7 z8f4V(hQSzxv5TT9GPo?zbON$~mT=_v(KSv6GKf-$3&!>k465IAhY$cjz@^Z&gOOz< z%=hLATAs*2K1*Ol;e>ZTpLD+t$RkZ9AFRHg4{{>wowkdacu^R(Ds| zOZVAl?_I}+8Q=k|;aY+$qGj~f+R>~nF=GA0YMEMKh#9+}#9k+bt1dz8xa4uQw-+$2 zfgutGEYZsfTwO;b4C1eKyQ0_D21a%otM^#7@F4(N`_6WqxsMko`t5Ag>Ei+SR3n2A z-h(sVvQBIR)e1)XS*=34r87DXOS{En)t1WpVVzT#4o5U+6|QY#2`lR7;|n$@rjW9h z7e-;m(bb?(1|)z|#-!i0HT^H&!{#iww`j-|$B5`)luARH*kI+JR_x-1162~?EK z1aZK8Fm0c*z-)vuYbnOD`8pK|HOJVUV8UURv57?jHd67CFeOa5qEkVm%0Vk^Th~EW zO@;9TUf#ViMQ6RyF;TJlO7tsXr{&~w-P}0)X%cJ}dPT*e`J^t(v(!o;XqqgKrO3fM zm-NN7U=6}7MO%)ev@fFWQt;_`z#*E{Ko#I$l_s$w!xDJaf;T7=k_jd%UOz#~tzeIm zkkX)}(0~W3E73X{l%AW)~PfUldu7Y zd!)B`?YTTa_rFsVH@=DyMG z7Vc>BnAm^sWwhB>CkJ^&h#C!7JY@%)5jWU2jO&vmA8C;U>=HuMXQ{Mebzujc2>te~ zj?%4^grMwNPugB0!aE=v64~~u-v$FDu6Hn(@i zFK+!kM0DhwpAPvLTHVqf=BufGBR3} zUD?!JZdigiZ1}XYQ(cCg*=T9vDoooWZ1;v6}?6=sp^9MIqq!zEB(7;U`{`^KJRMz0)V+?{351A3u$`^hf;zLt6Lo z`}hj%&;%P8*XV>^b|L`mP5g<>g3;VefZ)=FOVI8zDor2({`(c*PCR5?|0ZUB9v%M= zUi)k#o$+~oyvj95px~*TOAs`}zL^ihR{qb$!Y3r+E)aF-?r39Z&A&MY>(q5mifw(J znOU%O-B|U{L;$E6UW_=ANu95 z6wacD>;p!>uY3^zdBG@=En3Rd7$a7|wOr!9;=lX8yodM9()^9w)LW~k!y5EKe&Of- z7^Y|hem9N#?LQGuAoP$6&1{XF#j~Dg0aD2X>+R(eAr;{%lvF7+FjCrM49L=hx6IB7 z1O~s(iUGrP+8Nt)7Jl%=$p%ri%1{Z^ONcH4>qF0`J%1JeL9Spwq+9be2Gn4vjuK5j z>VfzKPy*9sD#;YOg#P^WuU*!{F!f;-DMzSLVCdc9Djrf$jDny#b_yN&?W67AUM8U* zsB3DZiJpe7?(|L8%r&tk_m4gsZKH_ZT2EAls?4i_n3JTx%bI&+7$hdSVnj0*rPAz<2+X&%vI3YG{_-X{u0K@toaj^h^R$iX}KDI*Cp+ zA((c3iGGP%R$7S>_k@p4sIqI3%)IF>D~|JX%~qJ^sP}GZA8S)WCWz)pR$U!@*ZFJ* zd{>j;wdq=&;$Pa~Z8ke<=F$a%w6VaE`Q}{^0sYPEz&VaNH{nb!knt_n9A|lD0 zE;2;C(G0`g)Opl}CJAaqM4^Z0JcsQmyhai#*QJ#cM{AV7^Lkvk94m@IJX~MW?qAC8 zE#t|j#JPArSwn78N|oIs#86!zoc;8E=L$UzIL`DG;H-mSppenrPa4K96$g(hG6}5! zNiP)(^3t%#Q3Y&7VXz=lY+n?Iwo1fcQ!#Lx=tv zbs`v)Ruz;9bND2!y`Nn4B_fm;#Jk2Tj9b2to+vk(Ze=nmH*ShCH}f)J*f8xd<(DCg z?cq7)3zrVIa%IcNs42YeQBqmB)^qJ@xDpP)@!or3n?M$c84clLZdK^vsJJ5UNL50P zUNETnbT_&(W2QMaNYHrFij3v!j*?xjbr6cylL|&{I$p(R%e+2*(Htz{MqHhni|QsM zl?3L3FU_FKGrQit(0;hh&PTlB?8u9kBsu4ta^2A^YN-hqVq@Tr%1L2_f^2OT6TuF! ztXYvs^~5FHI--$x`I5GgQS&dEqFX=#HCZfK8EYwMK-LmBWpxclOO9I8ZQ zcuJn60&JNQDHX2eQ@XinZge+1PKycY$phD6nB<0=bHrB!*mz8v)-WKBMN>UBFr@;a zuh)B&iX{c<_~T3S8GO$$r$#oB8{4d>d(q`Mq62(8w*mx%&U|ZC;@Fbs$bMG90dD`_ zy+99EO!`Z5M0i_!3Zq}#Fdv1y^<6A1_OTX;m*<&^Py z7v!=~B+t_QnE3~Edmzo10%1tQN(kd}wmW_j zDnSrp=_9Wi^{*5q+hERu7NSf8e(#xnh^Jm8mN)-(vJdQaD6H~I9r;70i!`3W;+?gNk1vwA+pO&eZ7tIB0(7<74VCZ$ zbY#Y>%Qe0DCW=(lVgeY|lP$;b1^N*m>p}hLevxTw05p$tswji4f!ah?(3BN43tMC} zAt-b-+S(eKWbq6!Rx%nYuc=f4*(Aysg3x=}WWMC5XeM?g+Ma+Da`@0pu}#~Cc@r{@ zU#D%*VcIRd<85sl5fX@40;>xWWf_s2SI6oNE{`}*AdQ5nk^{W9$$vM?8@Y~GiDF8( zAUPksxxQ&D+=Zz>i3-K^Kfc!tY!GNP(!P_x%H7mA2Q{mxa(Rs3XjI;RMQyxQEOB@Y zS@OscL3q(Vv5`5fu7?RLpsWo<_z)dvgBFVLEBnWNaN}p;UDL1Am-?=BI2vKNzlaM= zVOY(`P0u)}x&WXGbRj>9Zc;_OS=GUj5t8J1Nzw)Efo{WCfHBA$rgV38=EY>3So6ib z+K)4Y5xd>+Hm2XHxh`PjkP(9Whfn{JPfjuKSN!Y!XZ0`R?M{hH6aw<(STJBg7cq6e zF<10ZaawSp3385X7@?IcKSAeZiHCES^KpsN)Ia>ak^tZy3y~8MYW9w2Jnjm~-m7uC zjXe_WZC}4x+;*|+ckl+~$qO>QgB2BsDA4|eM*^gI&(UG+j4;~u+@Ya;pOG`T~>5EY=WLBtV9 zKhgs$51q3*hDIZ|5B5+>l*|j!U#%?UHT@irJ_gv`lN|<&%#Om_V({21CpwLZS9S$i zDJ?}Gf1fw;G%iQtsai3Rf0|M`txG92tGF5;5u7et zPZGN1kEeP;xe@!aUg;zW8HpaOWfi1dy)`rT>PgTgXpkhX=M6OCAdWXa5@jcoLUqYO zEC<+AMd#(%JJ;C5DrkwapDAY}t7S)2_@b#omDA0g2^MIOH;xyLx(P)z$DxD-Y20av zdXHzb2MuNLncZ4IWlI$>JsQ-Q>Hpd+pZ8GkT}KN1E2#YkUCNWnkE;%TY#;Niu~70n z?$%>yvMVKyWrVktQT%wWln$#*1#={C{S$E8NqQc8+-w2BskVPta-V}kU`S?Mo;^eO zKwQ9lk^FFYFZ@`#kXwM&1z@?sj#Wo;Ycc$xJK`EOzd;&0B;}LgNb#rxrfx9C;IE!c zFql>DEmb==`bz)6D-|f<41> zlV2J-`ttOW z7j4EXyO%d;CST3ODKcN2BJU7Xm;oX}RtNK4(n~#sSW>ilM#+LY0+j!F$evbW0y(Fb zf=2z-#QPKCF9_J=s8`0dR}8JHv`gpeO0H;FZscpS{Yh6fD*G**J&j;6EPN%Iye*tR zjZ7pYgMow)?(9iMDx%5{ocUE$EJaGYNR&gDZI(a4Qo8+?PpY z3!!EeMb?F%jq4I|?fT+_2*6jWimEE>f$_rVi$R!Uehi)a)Ti7*G}W~fUEy=5_OZIR z!6(y=Zu`ysn!9mLwj<~3C&efGQ@Oam|Ls4p1G-(2>|nStYBuStBB5mRs6jceU0J8O znx-+1L>kLX3$B>zx}*IQ2>`)X zzqf#k>TZr=IqSx~xZ(UflIfNG)#A>fkHCM2iD2S|y~=+CW=UHt@O_!j$Z%WuJA~;u zqe%H1kco>)#;2<3AE4JH69NjtCjfD`)&#fN0LO5I3}dsq2hYQ|<#E&V-;itq9FRl- zG$;@df^(veK3?MIDMX^aLNP$Tz}H8FgB0fOZpVl9wTkpR)~<%V&)gvg`qYG0SGE-2 zVzgDTurLsvCozp@PVvUrfLlM3AcjSbVryZQ2<^IqLtxl&l<2Q~JciHIzNBW+U&176 z^@6V#&aC;`76bxTRVxUZFbhNkp|_zzz&E}9j1MQR#-N1KXA|G)P-LR7qB#C{c}Uk{0Bs<{9HR|Y05p*fQYmR(9f6SSW} zN_*EM%F#5FjgS)}zHjB4M$EfJjyg$j;Wn>kG5bS0HuDir>VbHD0RSzGa-NO6qsRM6s0+z96U2cIF4CpZQR)S;01;gI#2 zj3JAvm=v*;;Yccckj!C6DQ|yrf2JsGE4i$@zFRZOTyFB2uyg z95n!(R_Nm$AU6AbG}BP6%5}I4yw(*MidXidd}I|^L)qLs(jvSY($oq_Rf2Xvd!+ZT zDn?a(X={Yi?5mA_3AdSXATM^HEq6Tq)A#b$nm_E$&eUtHW2m{MtBdXqabL_ZzZ8`n zDh{Z(i)|ih5<@uBkaur}N7Q%J2^6Dcd4&L|V>iHYd$sc9@OU%Kpt%JxXrMQc#IAAa z1W|gqcJk!l@+FxW{Ax1S87<&8zQsyYxsdxdXU3q~)C{c)*OItWb&A$PrB1`Qd+HB1 zntGH1q1JFlx}DQ#I8)3Lrr>gOX8J13_R`W2ajeV&S4BO~ek%ttEFCUsxq3C0+9w4_ z@^UNprUhGFVyze^>EsVK^S4Kv#-xhb5Y{9Ms2apIOcNwwH?G}9V=p?sE{_v%_^pYgL*iiIo(e$#d_>$szDAJ#Xf=gKi> zchn$t5L@o-hhf>q6?lFSTgFriod6KUn*19)Uj`YRL4c&TLA{DDiJNY&WTjes7Pm$d zSDjN!wCvvU|77|S6%F?D|41^1bCtm=E9-v)BY3CH&O-HrtKX$#O?jAQLhfvA4PIHg zKpbd6AHeyUbkzFN#;s^9hCI~8-Irl;#8$-Ln>!s?d-o%sLn1<^w&$N!fCKQkOPpf) za2CB$tA4NkGK0w4x&BpQZ5>;QWS}h?bfxR&mT=0|v+}8n*6c!_Ug6>V+}`1}wR5BU zd?W02Je)Z3A=skglHb8<1dm(CLEB6*I1E#>k^=F1d^Ru z4LePpSaAU%iLl-iM0mY}uJL*GF>6z#R4P@bp4HkHq$!oF6HSqn#DQx{D7~=U16QDm zRt>VON~Z1KDUiXkA%$o{5hwTSS2Mt1U2c`V!OaU}*dj`+B1yUHP&D1QsGI`J<4e52 z#}~Z_*_SgY;y0(8T$%*=Q(2@KEJ`&vJ_YY(0|H&uM`(&E5he1!TA1;8bAA@+!P)%osH_p?m6w+K^8j(4 zzPQY(@X$P`0C>EIIQMzKc-(sI+0g3PX>GPH?r0Gm{lwx(>KG4v1SzE5h_mv!-dbVa z=t!5jd%Gg@=S#2lKI(cuT^BxcgDrd)x7`S7>rNfBYiq%`6n&@6@5x-@5m%e@3bFXh z)dpR>&uO77iLl(DmF_ahNLFvEUID|uzD~lWLF1Kp0k~={j!gv^yQ0u|nT^nL$wWp2 zgLFS$Mg@}O#vc)miFd$Cl92}+j9HtRUN6_~@=!vLC(K{!wGo`6BN*F)A|IeeciUe)&uOGWKn_Mo!MY`n&xjhuiI^81bz)Xd}cq3@( z(W(9-Yw7YT>oW7>y>n~G(91Cyp6Ot);L^@FEO1H(;AuB{;i1 z(WNXV&6HaAb-=Z3E7CqHFDbu!`dn)>%bL(i0`4=G$^MlmW|mIZR8DW%yVPs#okgV^ z-o1(E@NV2cj9Irqn=m=CB=rCBY!nWz&k^KuLQUWfu6IHi5204pC=le3%in5)KI9I* z`$+{;FKQPEdZ^`JerN}qr`AU19z8&{dAABiTRQOxWMlp1J3F_5UP4l##Y_CHbS1zB zIF0F1WYzyj&bnp8X8kfXmxFwhTFhRBUgB%`XOtfe^Dp7?dU$Q^!bS}w1@z|qg|Hq9 zU}F!r@|=+(2UQc5FuxFWUQ@84Qute0Un5oS&!NGN<%8E^)1BP1`{FXS>OhLk;fbP} zwB)<;dWAI)DVzvb;Qk^r>S4crsTc7k;4{JUk>xdG@!eBI?NTg6!vfn==QQn(w1}I_ zsXVkXjX(KE^L#dOMXAP9Y(t&hk|rjN7q@bdW3Y9eK7PK7bG+K(Wz|Cz*aORl)rsTH z;|>q#!QYJQle@8cxxHD~>*;NZ4bAQ4=>&kV>B8nD_Nm<*SN0*?ypwKv!>8*4;w(D= zc{w!}XOw5xs6XUt)F=GFTnPFQ{J`F8D99Iwm(o5t(3&m&J+?sn{9ZYXtR_5FZ0H95 zb-VPig8YqoXlK*rwO|=0A7XraAxFoLnEscAC}`#DW98RAw?H#{IE$C-*f0EiD7}J| zf!L7T{%HKMT`T#$SJrXuM011`; zSZDroM?^DY!E?;9yvGW`*2$Nh$}X&(*QsA`t_7{n;oU8>_v zJ{VgW^y)i0D9US^?n^ipuM(6c>PDZNZ6rYz`AHsTAR&hyx$wr(AuVC)Lws%8v;Ha-f~9ORt=5DfNCG(t+N2a(kFM3n^^(5BxzeKv;>E$Oa;iFdI!-Hw4K%)GWP znD3P8&@4OXiHY^`;J%EFOO&`Hq9Mf(NIX4 z#%KNx^JU(l>2A23-?+p6xpB0+57@_TQ0WD79(qc^bHlp;gV=;ms6y=3Ku~({ozGly z+3W;mszWXRL_XPafNM@eUR7!%Ru0D!LjunS@!~p}frKgJ*SI9DDvQ#pg=9Cv;DDTi zy8Vb4@TzZb{0CT6hTh!X=P>T39{hK@X^8Q3@9VayFpJWkW)2G!rDZhsk9NLUyN`4x zs0<{@BepUC7H{PYJAr&)gawwV7?)cI42C4_t2SZj@KTaG+oAg06-6xV+=!%YT$%!{ zFXLWO8(+#gyab&l<2%BbsCSHK3qL2ZI^q+%p2VH>UZcW-vrkS2d zaX11X$@7a&bDEn&>=oKZg7mNvaFw#tEx#_8kMKx1d>dqb$IlC!4T6AStl-62>T4gK zXPyR4Ie!=wefAaV2E1qN+VaKP@CaxrckCwnB|PJ5;^hEqgioWa(%Ja(?eu zK9Id_uclm9)~Uau8hw-MZ85>-nJ@8xTBHC7pwed57u&?BH_yW?+gn*ti+3o#PKn-^ zEmM89yUW`);^8i7mD%U3{+mt0lu0~;`IH15D1UaTP*hXRHzTZ&HFiH49yXy4LPEEe zYt?0})=^k)RBHQlo_Q4C%CUyVb~XB~!|rPNyrBDJ(S0@&DlS{8S%%Y$hl`JU-Iu%Jq&4ox;KwwL!P-K65S zOgXO>Eglz0sK9^fD5c|3m0S8;>r;*!V<&dTN9qIX+pW=v5QRWZID6*QkA|e<#CFl( zQ+wK;Dr)}uG6!>cttJO!nD1%oQZE4J3uAMoI|R#Hl||5%{wZEYf&aTy!RPStx}k)v@x7i_C~o?QC3f|<*3l6_o1FMfU6u7=^B1Sk zZp6iAHDLi~hs-O|8+%vZ;JBpi`}~bc<;cdxj{5}Wg{|p~ssd@QPos}2?Rprv>`4wYqaoD2O!}Pl2nj5^(1GYF`gbR@ zXK{D>0^&_2ST}YzLM$r1QG$PthX&MlQHW*vgi70N<%!Bu>)Z{+{#P~(uZ85N?K3A$ zpLq5S3moSFw;Vj4P-f~lM>Q*e9y8NDOU8xoQfR{PNMVrnA;x2MJef8#f_ovH<-Pr4 zD#oi=V~U9 zer5jj4@!{M-3b+2z87Ej(5LkPfMj3?fiHM*M~QIJ`tU$-38k~Y0nxlUQ~h|`+Iy!yx<-b6O8Lre z>+_86_?s|GKn$V`XYV97s)MP|8K4=2>n@3ATl#QKwnMw&YBoWx5S#URJ91G7;h%a<)uug!ut(S ztYW2#CNCDfggQ6P;ZCA$f(xhHG_XViNo^EMx8e0)iAY9lbdboZjSxj|EtScDS!~7e z73Yy@ldPipJa7;y>TqN^YO*J$DNmt2w;`o;k2RiG=ddgHv2`68IK_tP!y(Tl)KyI7Z3rn8t$X zq{R4E(5o^7wu+G>D=wp?O3N%{O$Kzes*%zCz>N`4L}!KdDg^!{;6@et$g(+$c@e9t#6$X3^;z^y}-8#**%th7tgv!bGRgup?CvBNbpH6(}W= zKp;5H=d8>BbF1)YPJki3k8mu<`FoWkWxt*^6v?Xo2gFE?GYWsmI@K5nz7 zm2QUNpAklY#Zur`zS|F@={8&we+u5^G*}gTxNGqpZDrVIp_Mzm0qw+IJaLZ|VieiT zZQ#W>XcBEF1oXOtWXRCTGxyQZ#8gitU4EkLTurrhbv}O#{q*!Q{d4*LX+QO^eOSVu z{yS>p0cGC>HJKrz+279FY_dBDyq1VgrR^||P?gRAGanhFa|Q)j7r$vwu~T+xHpazB z5Q}|wpU&xY94ZFsKL-e8M!%z3?5U^`Ujuh@o6Z!p^%9|`-ZRSgSYvC3(KV7xzXsS}r7d6RJ$Bh-|Q&9iT5lEn>XbN@5T+tX;lV1rG~C=2@32Zv&5Z2i zx6m2QHwFa#+>)%P@s3+ke#d0%`be4>!cVW$JjeXjtq5{kQ(_$O^f{8P?GbHlh3*Cz zDT3;dyZ}`k^ZDr|6{ZJvapF5>vx(3^+sFa{%r+lmzVJRa#cmxO`mxsJFj}(=BGSih zQj&wLVYEpR3wDzYJlD`2OfeC?8iC*so2II+wP4M?jng(s>peK|g`}^L{Yp@SQvkkK zeUjZnv)J>n+A%v27e!F0k=|S|4u2|4Ia-!)5LGsU8p0N(X1`v(>bred1Oz&5VxSXX z*2FHX?=NLgU_UA*yeChC5Ij&kD}Q8#7NNy0^~K!pL(szSE=5qNsaH&>iJ)GC-(f;Oe{=KL@Yfy8A8-mgOcriV3YM3! zJmern0V(YWna-`M8%!`r#5LC1P7_4HZL2EBJ`pfj7zyx%7B~`WxZUW_Ju|_RT5zzi zvRGI^7iQI=;U1B9X2XF8k$-{F1&1fj8}X9y`>B)V^_nm^xGNtM0pGzK{unTK7XV@aK&RjBYP{Y+2e=ar41 z9|e;au=oS?1O1Rw1x1^HXk%7V?~1}mhiF|W2Nz#V%L#hanJlH!i!AM1Q1@g^Bxs8@ z_K1P+BAv0pKVEi+HuY#SC-1B?W|8k;4h2?0eZ2oJc$>F7;9675jae zv~S3M?$?>U-d7VKN1a@LpCrP85Nyk6#2L{kGj&~MH?=7mK~$6I!4=xP5+$PM|7?^7 ztiu_=x;%^w+A}Ouvw>?17~I$turmemb9pn^e!&-mohIr+navwI10=>6h74>JrLCk| zr7{EXb9;m*qrnNZoMGrpU1Nf20WepP<`+68X{o7&p^t0|(_72kRcN41fu(OToyH3q@%fm2JXmz_i$GzUbV79jw6t)-4{;KnLFc? zNMPwFjNCQ+adlG$09W;$t}9hP)?u*~NiVRaw=$#1*8bg|G^Qa+1qro7II*-x!ma@v z@J0t5GotT$c=w4xSXqNchKfw0$;XTpoxSjMGX_c?+X8+gCG~PN{Oy{Qi|WjF@@$_GI71eia>42)o)4YZbv@p|+*jDu9F z7W3__E{#?;Tyl`0Z5fQ|9vwV^MLeh2ANG13Xr;(S^rXrq#B7~&fZ0xm0u>!!m$88% zV>oWu6zcvizVU^@FI^;|@rJ<9<5gU<)2e^6;D}`y;78YwB-gSS$Ea&v2`@dk0XfKo zJjrgBxovBx1sLuf-Y-`uTZSL6_RJsqEp4lJ>5S5)lB|NeGqvidm%=foHEX4^<&&oj z=@1+@hs_3ZB2~>nc)vhX;9021!&`>aeX3Iu|3KbTEHw^csU^WBfc>|I%w!i8O&~^B z1|*~n05uOW-nQ9&6vo9Vsuwx*p_j(7e}F&|J)D^v_Gbq^kC7)nsq2(MQSoiK?%_>SbgC7EXyhHp6X43etWr4%==IAdj=j)Us!=$Y)@0k;G*?NW-buUtMgI z=9&;PqsQptfT~1gKQIL3(Z8z-vuR%$TtpidQ!_@PZhLi46~=fbyaWL^J`9sKl<*r2 zv#``bDGF%^nU&~pTs5(t2sUcCk{IqNr8Z{LNJexdVsOjJAd6MOR4pFs!ybEocUfu^ zz(X7DMFx8#amvW1&wWn`eY2;KjZQC$^c%dzUVVQ!a;{tWmuX$KJ0X`q+n~QJP#t^g z`-lBASbuf4JsU@qaPm#wY6r_dzG|To(A?#7k*hkCKqXS?s5M=EED$DA3F*Y#v=Vmm z!oDpW?0rq6rtv9Sa6jY`n=bjoQuM+Z0A%3BF!6rKWv~)5c-Qb}T*$B3pgx&?R^bfM zhW==DVUn$X8MR8>rMTE|+NNmD_yb`+_m%@t;SY}e;Tav)4 zH|7hwDX15H89WMk_k?u`VZ2jYk38kX$w^_hu36MwwsY=7f@eIFEk{`%q> z`Jk+4eSy(_alQGYi|6aV59tl)@~^ks-G@7P@h)lo0i+TDBBWh7U%v=#1c}7&0tCNJ zZ*cf~_%8r>-Vzx4G&e&x&llIb!3Z{|P}@g08i_6(ZIKJ}HP2Ffl$^Jw_&=3I(ZFI%B|i!mh2Ahq0DE#C;|jtbRwo`J^lnq_A= zuJKxuulCnY=PX-?d@fJr490z_kyjN;y(yJoQVo98L#h(0x(ow-{9OR?Nj<*E`_0Cp z@@M%(LP-wZ?#kL#moY&5EQ8Q(RGm)_eo;w@^{)>no!q(9VUqT$>x?IHHt!E^WzE~P zRO(paWAm`!@;ZQEe5t9*EAF=8W40)n8BsEd2Jge0*;3+egnhs{4YN)zy7E92_3~_& ze#^TEph=YLTC{a-&W+R!dg|d_(H98Gd_;zXZ&GOiw{&Vp-vId9HSk4)}_RXD6`r-DOTpPgypKY{m_&g*iDwzyg?%xlm zuLjB^J(u$i`cy;#1+V)`a9rj>EGPZ8OeBM@MXc#V45PA?1Bo1&E-+7vz(%e)>!3`+ z5AiXeax8V#U2>&1Yo>X04lA*k0sr z)_Fx+;6GaT$S3WJt`6N6nMrr~eWAATUvk~=5hfq8xyer)`ioOf=-|JRkYV{i=n-{> zbJKF6crVdUFy@s{uBIJSb;$2tcQO6-zJtSN89z}zQ~>r-O2#+ezbI%8UtV{$TCY(1 zaV%Qzm|cu|MB`&Zp*0XGl7xLOWKDP0xEsxo?=2vgQCQDk4kMEP8l7DEh<*+QmYNK$cu!3sJ$+#2ni7a7l(!=~koq5^sz0L>`r&^TJUy6w9(2UChS>w9lJsmn>c zT7a<_Yj~+6Q8jIv<)+{+N`FO7yL^N}Qt>P5TMg6%tXTiLb?vo3GkZYcDex7GOHzJN zrb5-5STo#zB8+!?v;TYRG2n;74VyQBT6vDYQ*0q*`o_~rjP1?g9$q;&EjoqnEQ7fc zVh7IGdu^AUe23nbNvvdL6$rrf6-}+}A^ShrBXg(Y{M;XyZETlAC%IH=X$DHC@LA_= zcnR=mCQq{c0tNkMYdDtu%WPuA|A}auEnMS!@7--&jm3OfkPEpm`uU$~VeR}820UG4 zi@g;+QTTQKz7?+@4XXz!k_wYMA)hoooQ)rMc#dwAuVWoG;5wNJDgC#)Kc zJZ9QV8y8-^zmu1*&r)`Hu5Cs5n8^m_Ju(46kGDL2H;16PTR+zSV4*dcRKFOnI5OsE zzWC>fNxChIgdWmZ_rSTi$LRJ4bIiygCX}@3ojKv0mLUp(H^qWv$zN9cT7d-ui`Aq* z#JUdf13oXb$AfHF@(i9&78HO(YR;(bPGU*`E+M+N9;~N;qx-GkdnvkeYAb;G zVniwWIghqhR^Cs)AP^k!yG+##tfQmU5rJ_>(Q8(ny8)Q50X8jUl0R6}3XF$9rJj8j zS=sz6Hp+P&@7o&L+U~p~aMkU*+S3&8^G6P3k?u{IwLT@PlUv?4R25 zf2&bgXqyotN=XE(ejGI6T5j@GS(^YnE#B(x?(*=LRaskidBgoWDoGZMp2qiQR4C9^ zw5`b;78DDe{5rD>Ewb_#kRSYKL+^JOWj81`&dPC~62z0<#M|^ZL8}bWtm#2eT>iet zaINl+UTcs4f)1k<8Es#-D&|fhqo`&q#g>FjwU!3O9CU2Yg%tq~ zc4``^SVycCOf+OQ?1_D0&GF&GCoPqeEFm?~6|itl9pmH5MWbD?!wslrOsM5Zxavkt z95ty#&uJ_bmy*;@ik>(cm{EWNl4)c3*fWKA1M3YHQ%s{Z(PglI+cot}G~+hPE?H@< z`;#TAQT{ef>D#e2<6jY#id^BFX^fUN>`vwA#Ib1@O|HotHEe~I^V&8lSn+T@g!Oji zs>IAubEayUd!G4N*~YMJ?ek}?0%pXO2O{ODX1vjw2P0%rVjc6|BXxir6}B8x(t0|K ze5I^Dee2b-QFEL%71Q*P=;$gLqopN|@Y@6Dgx{PU^F0Wv<^MzifN1wJNwh8#gsYXh$Eca8QSdFTuyk3 zWkM^SLQGExWa0en7I>u~6FJ~~i9VLveQOOFG-)nyx;#?1sA)iIgZFiKLvFQQ#&3xb zQ<2mo0;Sy5*9K z!xaA+R7X=e<#r&(?{?)!h5=& zcZ;OyvIZYIFpnfX!iF?yF@gR->w?jt|KxqZu6u0bnij``vZ4BTk5Omvsdw}{Q1w{H z`{EBbZt-Ex3!I*jLE4ryQVi7n$~T7mb+tbd<22u2KrRON@r?pbP+V}e5e&#Fj#LE5 zJWmb@8VMd(L}UV<3NmbiJqEVPo;=H_;|27{Kal`|c_E=Xhx$U%0vl%DT$efT6zDpE z{ab=Qg391kf=aMHf{CfmR)Sl^IQ6)PB3bRU3-?%k)*V|_!-a4$pTYh?5Do`}c_D5? z1J$BcKs8S6aTxeo7(zWEmT?zK2Yo@PJ?H=qz`{d0EuWrJ4|;N-&{I3feNmKQwBLY| zRgznn7>$hU1R1@|iCdIq5zIaUybT}!pxZvKDRC7JGPWfQOoM(H&&QsxOh0bwg&?y) zfYmV5Sg27%wX)7*u#l?2qYAy^wtUbDG{C|T*!!C-Z>p}>A!Y%9g70#Y9XcI2*g7=&lN-?1;zvQw-@s-X?jQKJj8(lXSDXqIPa zjD>d2OE=fZQqcBLDum$pE@F6Mr6A2x|F!RK807k?TgL+7Qt4bY@WXVC##B>ahIPLl zV1v!IGEGhYrSR4xoqv@Kv~Cp`c-PL5KR=5PD=}mU2D$altho00sEVBN3Ok2M8<_;+H`eqY9S)C|Piw+o#Uh zgS+r%auArM8W30Zd;u|>B5^RZ!(B7;c36H^aI>qxGI)5My-)uUt&LF z@JiDbUS8j+EU1hALSqg6i`s4A@!cfk){PGBON%Al9!+GZ-vGK^*6-BewQv94Lth4f~(b;ul~pYU@WUlx?hGOuEbO zP=*+69Su@}8R`Triy6qfvFE?3hk>|w{VGdlDC@|Qck-8aL-SjQ4fIU?tymFT$0hHE zZa%!$yFE!cJ)*1Ou<g z-&9ySe=_=NSRf!i@&9AO`kq51@OV|XrqmZ zP|EKTR=s|;UrPD=m@7Qw+r2Lzvuuc4-Na1I+#v#?6gg# zM|OVhtxYQF?lU-oe9J;SEXD+UDloWyo@UD;%eAW>5A^+rlkO>o8~Pw3>V1j;-1ep~ zvd07%9E_MtnB3>U1Hf96E9;7*F!C-p-W>qtlg|l)v@Iu52Siu!l#J%>$ACGQ;7?xO z+(g^=FK)4>(oLP6aHot>pss&?=&7zT=1egspH5LV zFqzFSHf5FPF(rAvodxhhZbAYMXms$`qvu_xlvkgYmoy{Z?jh3iY8B)GJWj%eT}5wi z33#Ns0*Z?ilz!=DKuxJj>LSarx1gaUm;1__TfH!X+^T5-!L#iq&W>{}*d)F_8(qxCe9evgzKFAA2Tl7EDcJln+ zIebU=GG_WujgF;1J1r6eCKnelqY6DxVu-5s@M0EUzVl`?+;$;;QxU3i@j>e{ZmB$WNlmg0{|lAiCWm7D zo1|R|F=qX|c(M3H}@CoVKH>m=5-GY3+dMbHS{xS+pgfo zsGVK#vwYspH3-D2q z42=GKOG$g((W5GGp|pO3tp3#$nG*LOfAIB=h;^a%YO6{>n*<_6dh113*nImNaNS)1 zXG_U^`kFZ@^`7fR9Cv%e_gJALvxVn{=vDZ6+pUKg!(%{VRjn?}9MkP02XClxaa>+I zWzlB~Bqa=R2i+%wIHGA>qYNRg9%0eI;e&nJl#A!|d4vqFTfR4&K7e*Zc3o4y7 zb+x2MErRg^xbgq#>MNt-YL<3~f#B{kxVu|$9UKOCceeoofkA^L=nyQp2ZBSe;6Z|G z@ZjzQ4}{Bk&v)-V=j*lh>gwIywf62G-SyO}erh;PkYQ2Seqyp?QXSW@NvE-=VWDEFxGH(iSs9>GxrD+3lwgIZ5j+i z>-oNWgxUtw(#U2p7V4zJZrl&Ovi zS_4Ol%{#@p->Ig!*1XwW=aEOOi8uXQB5LPjRZyjKw@yKpa|B4(5P@)UG`1e=ltRjhC&EO9 z*6f5Q4!RD-y8NL12hG^MkPk4*8tHb7z)>(d4o*(P@VH)Z9+e350y~s*dTSIX$7!RE z%^|qeg=h~^y#H@ND+`+dGF*A%H@77Es+UoaW+gTC8yPiuOMbXx4sK=fn3#izHAPq9 zNoGY--d^iZn{*`A0`NpFrlWJ4vk6;0t*(ARineX4p;Fm)C@^)})CHy;&lQcBlT;*6 zJ6%wPU`d|!nH;@EeXukSIzo36BaZOS*_a$S6xn7^V9bg|i+^BUQpNT|j4jwZlbXza z7ftSyZ+Mi3P;w}y+Hw>P5-!OXbeRo=z}$X?VYcifE;rEQ7@&;avsHbyA40-Yn1? zhA?O=3^)alf1^8~V`6O{Hp#)u<^0q==9{5}2D}hO*kLfw9=^5-zk624HONFO{$byw zL-$C3RJ*zQK(?*i zs;e#OHcia;(y3Fa7Mz$H$*l*IwjCfxkG+0F58$&O-`^M)MaaWOy5Brq?uajTR#t z@R9@~e;FZ7vD3FM2aL3?X}M?)-(z#)JJc$f+*6sGlehW8pL>PIb?|~lu<=!x!|4pEOM8XZniql8>O47eh-=^tdPE>3;L_WyG&^p8w*F}ixTe+GbCc#qIrXjKx@Kdvj$bM@s*G~(#p3Ot&J+~{f}aW8i#1e@ z$s=}$Da(&O&qOL#y^go0VuO4s&3w+4xAW71aYHyAK!yU-cKlTgF;?NmRL&BKP|7~t z9gRW$8+$K%RTn(jj?Q-;&GspCwoFNva1G1&gz~wvANBO+;f@}}bACJiL6?WilL42n zEM|aT#=?h?>JKA!%a?wbE4UHAm2T@+#KJ&s9}Dk6s<-5U1oKQPf5wt>;>q8&Mt1ar z={xjZ+qXYljXJb1ZOW{9ew`_4?01b8ip9V;nWn4j@wg+bb2pVfIId7WVG#;E77B^5 zZ7TgN$PBr$W553i|EWmVmsSOGD*Zek%DBf%HN^(Zf0}seHWKG z_b-l4i?{7}^PBDc&BOg?Tp{gudz(%5usaQ-Z>@LU7t~8i=Fc*(yFP1co*Ik(aVKnC zuyDxt$EtKEt*^0tNbui})S1`xzq)&lJH1`u7w=45J#aD&e(Zi7qx${hiA+ox?Nj)d zyMYt_ron0)Krc@avwN7X^2=`5u@KX3PC6Q0HZBsN$GI_W`MBFeV7*7c9Arq91Z%_j zGTeR(l-j-NNbE=#@dIl>%L@LmN*q*WusI$P6nIOaI<$7BY3(;`X-d5PJmLpK18*jj zxXkoYc6HtfddY33Y+qk*p3SZevHOqoQz)Diau##^E$8@KdpELqL}ZGWQR=CHrDvA* zJVjJ9NyM&FfzEBxBcaj^iVS+Z7uyv6&e&Pt?sY*lrfY#earf?S@TmWm#DevGS91DV z%_aztMW^oM+i(e93pnb8K&k~F2K=O+e59XWH7upiV&i0>w}VqDUpSXQwFiQg=2uubVHwbpaj*V6lo5o9zH=Z(Er(6o#?Q$b zMewYN{oBSblF&smTaX+Yuvjny2?^FvSgZcZK@)Qf%}oZwhxqh0{TqRPr8y|&EM&0t z5vj|KD!0AtA_XDzZ+4DpY~#W>Qf$jlD8?eB6oadPJOZ`|7z zVF%*bgCV4Uw!JCLXLt&gA3M9d#&2`Wv=6R283!s}(t_9spGNz?vK3Yzc_b=Zl;?h? z=DI+;Q2xe<#+WXk#nwcJxO1)m7!U2_zGnP&VZZ64%tr`oi4F_tG)iH%N&>0~@Si zragL53kDh`rvd;j?uj1H>eo#d@4|Sa@#x2M)S*83j%HQZAAf$$5PWE#cqEFJ4$qQj zpBzJpozodY?^UorXE$~YsT8bkW&|_f62Cz%8$4k)2<4PbcxPd{#K(8w1bC|^;*2_~ ze91@MqeIvtbg`*xssBgQ@)=XsLip$eRyrkQTs6Xx?|=2n)E0S3v3<&!>}1#cAzQS7 z6SxX$AX(>UM@PDAphzre;>gNk%ht%7ifaGHoXQbqLW4xUf=y)Bib0b7X`ic_VD<6( zJ#%$Xfu%*-rg4;a6(0={8v^9!?B*sc&nyxdIy4pO1amDix zB$N8WiCr)msrR0(a$cel^a{h1lkrTrrhd`ikoiM2Rg2#tx>B1R1u>v{-|#5pZR73=7{-B(CkW-z&((ZyEHSQ} ziZN<4Vs2U0ts$Doi|qIk4R3G;i`Y9@>Q-QzmboyIKrtaEQc2DKnCD&XQg;?nXZX|e z93qX4u}8)CIMF3G>FbP^xkJQL9JXG-q3p@wuN+IV&7NOS9V&Git@p7se6Y^-)v>*E zAH{p^DPpw0b>uE(`uZ0+CUGB;OWcxtAwy`TL+E=l;wq&GH-sACURJgpSfGZTng*fR zHb|~L_?4KhUrv2GQ}(1Kf_Ppp1{56@;HvR0JcTX@(J*O}pe}~pYb7)QAzxqq1Pv_& znSv9lifD=~03HXgm1G3W|CB$J{&NU7xH{YNPn2JqjW^h>CpkWkEpS}Ot=+DU5m4;d z=RwJUULn9;u8|yOWhp&xy8qOhy+_{t!KI0sqx{u95zHvpWP+;*b7- zj!G05e;BEGRk8W>2#J7I4V@asdtI4wyQnfCE$_VR7c4yXl5R1p7eAwd0P|T8h3jpc zUeyL>4Qd?b>%1u}Y+8^qKI$BT&cV5v^$j{13hzq!5;S}wCI|`R1KlHLrV4i!;(K11 z-tr4F(mcRs)ybh5<@WS ze)Z&5pIhKNJMQ|?wn&jQJf<~!ZwP0tz!A2MKM>Q6%Kf8a&H*Y|ixR2KEUqbTR!8~D zy4fuE+(nTQZ7={bkr$^XHCAT$a*IY`Xck1F$M0d;KDHZeAtCyAp*{Pi%z~P05MIe` z*9x`ifJyUN@B?7+A}g>U;m{V_q#Hhk5!0qoe?N=gU*dwOJs4+~D6#|#R`(yl>YkWp zcXuAXR*H-mmMCk(RUQRG3btRWG~>7Gu%&4jvG=rOfmgI~lEYQ^>C06yf1%j_+#!j*xi63&-bcgu zC^+iw_Ay9bl6ST3H1`>?A!G#|;Z^r77w0ZKepLll?q}-iK(bG#wPbH*>qPj;@N3<6 zey=RyH-2oi}C$#Nur3T z+xf)RdbAqpcaAv$@T@MHl@lk{^p_C)VRXh1+TEug7mRHd!#QnqrT*KtsAfVMj6??j zbX5QVQUD>q$H@)q>g4Xi4gYk31K<5c0@K-gjUSR{k-TX|Jx)8&Hp0=znu*lQb)2-p zhw}C7*X-|TRI;PWKr(xm=RbZUVJof!UZnOK&exXAb+d;fW%`wNc6-g-&8yRL3)J#n zXHUppEAiS3E(2BN^KZJmq+HXl2Rz!({3I1R^u@_RH%k|%?b=Rza$7zNpfrt#3)reu z`11MFyS1!}dWcFcwSMTfqEih8CbDwV%>GR$#kD)Ai_eVOsT4AnuRP<#)KrEF`~lb2 zKJc{6>f5IUHNWkeiWh_M@4FwTT zsW^815_P}Kkw5?{4!h4-iLQ}bg~dB74-rF9=L<)Hdn>bj^pV={N|DZ0oUHt#SLi%9 z4aUKa%_ti@{dS%jAoM8DF<8tcs@=L|v3CZ8Jiey0Ci@R1Cb6=yT9OcXlN+tf{vd$V zN6u$)zL`*l^jF%Y_4Q$?i`&c6uiu{~^hVdV3#9V>c?^;ErtIKILM=-u>lrXL;m1Hn z-WjGf#^@!363wU)M?)kZ&Oeov(+jDlID%t$AjKi>ghamJ(I*5JI#`0?5+$BNSY{7p z3pu7rD*&DrhM*wg&zjr|HMmfCGqfD~16kuks{$z5Tov^PH9`R}q5PP4M``0tHc!Q8 zEvhWiQi=_2w#I2$)Wj(w>C~EtIkV*EToL?wo&*}}05Jo3ghn-|iI0RI44J}3T-(lx zXs*0L{o>1Wn;bm-ZW5{QK^_p*KTT`XpfiVr&f-o zSSHM>seb-ETU%Pl-pxuKOe%><3>E1=pu@v2a6zLhFtox&Gh3kcV&lD84-cZ6V&K0x zx1Ovbg@=d5nxPqZGT?5roO={hT6g!KxLlzsu`|+ssq#8PUbIZYr@IX z4PsTg9P-_&VK9ex*wPFpX{P0d6IX)y)}N+F>z?7XeG#ZXbJ)jhcW1dY>s1@lAbNxH@@jY=m+$2ae)?;4Z8rc6T|PSeeDSQmbU;>81ld;#?l|db$n(M*-Zuvw+(!~ zX}9uz+FpXp*=V0l89n<_wI}d!XYfwZ4A?3Rj7Rp45G;6yQ>93fe)4-@nZNziqL3oP zUc@R^J`oOs4}y4!_5v}(EwIMrb%0*zR=Qq=zFs^#MTvDO+kU*ryz95UDLyWmtq(H^6d!48H_BbCKH2ZhUxN=5;Yr-ZDhz z)j6ywm&~n4-`m6^j@Pu(hcT5&V6jF?Xv2X=+Q6`D=Xf2;^R>Y)aYzD|BgGTFM|Qip z+};vRtRx{Uti!l-2MW5l0*wvTmsuXK?|)NiPHwK1dMkd6bt)_1%+$Fjb2D3iHC8IB z8LZK?a1`Ccvx3pKsyO*A^CW=1ssEnl*Z>x8l~i^bB8>eGQq%BVD|mzE5G*d8HOF<$ zafspJR6-5jdEFpWEmwMn_V;oySp3mtIrmYbBLUpqAaN+JMCHodZ1pUuCh1lxAT5>K{19z^TL;7M z>8>Zwbeh9%i|}fA?i@*ZgidvMjy^a5Zr|YSrtDngB;RQE+(KseQJw}bocxTlo-d@X$Xlo_EH`A&7v;`qL zdfRi@m8E=7{_eS}o_v(aKNpOev40M?PG)%A86 z!52J}G&K|t2lSdf$}al~dLPK`t@_R+hJMT623QnU_{9_G^PlF9r^50AuN~&J}XY!+H zC=BN;-^Si~aAB=zQ3{#z9tn!&ORaWInunu4w*EwQAo#lbHdiSA^|`i~a985G*iFQ3 zV1;|tFWD{XB2F$&nBKNrMg?qVuV*%Eq4WJC$X|N7b^CHcORB037U3J#g)WKGsd)1X zAuu0Y+d9X-pPmsMe+h+{w=@qv#ZOY%sF964I`($-6Hfjp>H1#Xx-AY44*OfpVdi$Y z*g{+WybJ8IPh0D9`6Ie9PxB;JjPgKc_~iT8`T6_qmZ1gbG0J}M{vE8We4}S{98RIZw075H{tCY=?WFoT{YI=vwdy^3#XyM95hOpcF zJs%yn!dz~(YtT-r{o03Xd;F(xN$E3KUx-X1{r1-MrY8M3_4E;a?w3c4!7kXqT9138 zPbYv#fXbmX+*rIB@h?n`xutp$9Vvvn1 zRifJ&)8BxQ<1FRpAWl`?OWQm9e$%z=8<2+7dz6lk0hJPL39zj3{E^^w~_?6?5vTQd-dHu z%B_p%y?p8BMJuS>h(@K~3S>2iiYg?S#A(|b1b^RZJtJzVBD^3%!WXX?>dQk5u#~cv zln?^|df}1>Vxa%Z8S&Epukzx|d?`_4@VX=NmlP7D=>KLh`-jAA>c9+0Z2C`+${{aM z%KRVE(;Nk!d&mZKw*05LeMkcO4=Nh~^*;*#uVT3M%ODCk-w`qB|Hp&?{?{Ob=zlbY z9PtA6QeQ+>U{V&GHR1>^lM4?x)&hRcgAX4=fNl9N z(+J=^Cvw2ug8vb*;AJIPaOD$R(0_Yv|5F)75q$gvj5t~hfP0iv!H-UG;fkl^hzBMA zYLml*PlXW=OaCpGRp7waPuUP3%K-4aN^&^!85>Zb60UOw2HID{v(H#T|MEZXztF$! zQUL&D|FsI-weXEI1)y~koaI~&7&QlXJLjPMfA}5uf3JtiPXPR6kp}+roDF!l00&;M l{0{)*z90f#Ey5KqxIq7nQ2z6uSeD^Q7i`EkJO6t5e*mU+@PhyV