Убрано лишнее из CMSIS библиотеки
Добавление подключение DSP в конфиг периферии
This commit is contained in:
parent
5a03fbb513
commit
d7dec9df35
@ -1,563 +0,0 @@
|
|||||||
/**************************************************************************//**
|
|
||||||
* @file cmsis_armcc.h
|
|
||||||
* @brief CMSIS compiler specific macros, functions, instructions
|
|
||||||
* @version V1.0.5
|
|
||||||
* @date 05. May 2021
|
|
||||||
******************************************************************************/
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2009-2021 Arm Limited. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __CMSIS_ARMCC_H
|
|
||||||
#define __CMSIS_ARMCC_H
|
|
||||||
|
|
||||||
#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 400677)
|
|
||||||
#error "Please use Arm Compiler Toolchain V4.0.677 or later!"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* CMSIS compiler control architecture macros */
|
|
||||||
#if (defined (__TARGET_ARCH_7_A ) && (__TARGET_ARCH_7_A == 1))
|
|
||||||
#define __ARM_ARCH_7A__ 1
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* CMSIS compiler specific defines */
|
|
||||||
#ifndef __ASM
|
|
||||||
#define __ASM __asm
|
|
||||||
#endif
|
|
||||||
#ifndef __INLINE
|
|
||||||
#define __INLINE __inline
|
|
||||||
#endif
|
|
||||||
#ifndef __FORCEINLINE
|
|
||||||
#define __FORCEINLINE __forceinline
|
|
||||||
#endif
|
|
||||||
#ifndef __STATIC_INLINE
|
|
||||||
#define __STATIC_INLINE static __inline
|
|
||||||
#endif
|
|
||||||
#ifndef __STATIC_FORCEINLINE
|
|
||||||
#define __STATIC_FORCEINLINE static __forceinline
|
|
||||||
#endif
|
|
||||||
#ifndef __NO_RETURN
|
|
||||||
#define __NO_RETURN __declspec(noreturn)
|
|
||||||
#endif
|
|
||||||
#ifndef CMSIS_DEPRECATED
|
|
||||||
#define CMSIS_DEPRECATED __attribute__((deprecated))
|
|
||||||
#endif
|
|
||||||
#ifndef __USED
|
|
||||||
#define __USED __attribute__((used))
|
|
||||||
#endif
|
|
||||||
#ifndef __WEAK
|
|
||||||
#define __WEAK __attribute__((weak))
|
|
||||||
#endif
|
|
||||||
#ifndef __PACKED
|
|
||||||
#define __PACKED __attribute__((packed))
|
|
||||||
#endif
|
|
||||||
#ifndef __PACKED_STRUCT
|
|
||||||
#define __PACKED_STRUCT __packed struct
|
|
||||||
#endif
|
|
||||||
#ifndef __UNALIGNED_UINT16_WRITE
|
|
||||||
#define __UNALIGNED_UINT16_WRITE(addr, val) ((*((__packed uint16_t *)(addr))) = (val))
|
|
||||||
#endif
|
|
||||||
#ifndef __UNALIGNED_UINT16_READ
|
|
||||||
#define __UNALIGNED_UINT16_READ(addr) (*((const __packed uint16_t *)(addr)))
|
|
||||||
#endif
|
|
||||||
#ifndef __UNALIGNED_UINT32_WRITE
|
|
||||||
#define __UNALIGNED_UINT32_WRITE(addr, val) ((*((__packed uint32_t *)(addr))) = (val))
|
|
||||||
#endif
|
|
||||||
#ifndef __UNALIGNED_UINT32_READ
|
|
||||||
#define __UNALIGNED_UINT32_READ(addr) (*((const __packed uint32_t *)(addr)))
|
|
||||||
#endif
|
|
||||||
#ifndef __ALIGNED
|
|
||||||
#define __ALIGNED(x) __attribute__((aligned(x)))
|
|
||||||
#endif
|
|
||||||
#ifndef __PACKED
|
|
||||||
#define __PACKED __attribute__((packed))
|
|
||||||
#endif
|
|
||||||
#ifndef __COMPILER_BARRIER
|
|
||||||
#define __COMPILER_BARRIER() __memory_changed()
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* ########################## Core Instruction Access ######################### */
|
|
||||||
/**
|
|
||||||
\brief No Operation
|
|
||||||
*/
|
|
||||||
#define __NOP __nop
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Wait For Interrupt
|
|
||||||
*/
|
|
||||||
#define __WFI __wfi
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Wait For Event
|
|
||||||
*/
|
|
||||||
#define __WFE __wfe
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Send Event
|
|
||||||
*/
|
|
||||||
#define __SEV __sev
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Instruction Synchronization Barrier
|
|
||||||
*/
|
|
||||||
#define __ISB() __isb(0xF)
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Data Synchronization Barrier
|
|
||||||
*/
|
|
||||||
#define __DSB() __dsb(0xF)
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Data Memory Barrier
|
|
||||||
*/
|
|
||||||
#define __DMB() __dmb(0xF)
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Reverse byte order (32 bit)
|
|
||||||
\details Reverses the byte order in unsigned integer value. For example, 0x12345678 becomes 0x78563412.
|
|
||||||
\param [in] value Value to reverse
|
|
||||||
\return Reversed value
|
|
||||||
*/
|
|
||||||
#define __REV __rev
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Reverse byte order (16 bit)
|
|
||||||
\details Reverses the byte order within each halfword of a word. For example, 0x12345678 becomes 0x34127856.
|
|
||||||
\param [in] value Value to reverse
|
|
||||||
\return Reversed value
|
|
||||||
*/
|
|
||||||
#ifndef __NO_EMBEDDED_ASM
|
|
||||||
__attribute__((section(".rev16_text"))) __STATIC_INLINE __ASM uint32_t __REV16(uint32_t value)
|
|
||||||
{
|
|
||||||
rev16 r0, r0
|
|
||||||
bx lr
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Reverse byte order (16 bit)
|
|
||||||
\details Reverses the byte order in a 16-bit value and returns the signed 16-bit result. For example, 0x0080 becomes 0x8000.
|
|
||||||
\param [in] value Value to reverse
|
|
||||||
\return Reversed value
|
|
||||||
*/
|
|
||||||
#ifndef __NO_EMBEDDED_ASM
|
|
||||||
__attribute__((section(".revsh_text"))) __STATIC_INLINE __ASM int16_t __REVSH(int16_t value)
|
|
||||||
{
|
|
||||||
revsh r0, r0
|
|
||||||
bx lr
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Rotate Right in unsigned value (32 bit)
|
|
||||||
\param [in] op1 Value to rotate
|
|
||||||
\param [in] op2 Number of Bits to rotate
|
|
||||||
\return Rotated value
|
|
||||||
*/
|
|
||||||
#define __ROR __ror
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Breakpoint
|
|
||||||
\param [in] value is ignored by the processor.
|
|
||||||
If required, a debugger can use it to store additional information about the breakpoint.
|
|
||||||
*/
|
|
||||||
#define __BKPT(value) __breakpoint(value)
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Reverse bit order of value
|
|
||||||
\param [in] value Value to reverse
|
|
||||||
\return Reversed value
|
|
||||||
*/
|
|
||||||
#define __RBIT __rbit
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Count leading zeros
|
|
||||||
\param [in] value Value to count the leading zeros
|
|
||||||
\return number of leading zeros in value
|
|
||||||
*/
|
|
||||||
#define __CLZ __clz
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief LDR Exclusive (8 bit)
|
|
||||||
\details Executes a exclusive LDR instruction for 8 bit value.
|
|
||||||
\param [in] ptr Pointer to data
|
|
||||||
\return value of type uint8_t at (*ptr)
|
|
||||||
*/
|
|
||||||
#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020)
|
|
||||||
#define __LDREXB(ptr) ((uint8_t ) __ldrex(ptr))
|
|
||||||
#else
|
|
||||||
#define __LDREXB(ptr) _Pragma("push") _Pragma("diag_suppress 3731") ((uint8_t ) __ldrex(ptr)) _Pragma("pop")
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief LDR Exclusive (16 bit)
|
|
||||||
\details Executes a exclusive LDR instruction for 16 bit values.
|
|
||||||
\param [in] ptr Pointer to data
|
|
||||||
\return value of type uint16_t at (*ptr)
|
|
||||||
*/
|
|
||||||
#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020)
|
|
||||||
#define __LDREXH(ptr) ((uint16_t) __ldrex(ptr))
|
|
||||||
#else
|
|
||||||
#define __LDREXH(ptr) _Pragma("push") _Pragma("diag_suppress 3731") ((uint16_t) __ldrex(ptr)) _Pragma("pop")
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief LDR Exclusive (32 bit)
|
|
||||||
\details Executes a exclusive LDR instruction for 32 bit values.
|
|
||||||
\param [in] ptr Pointer to data
|
|
||||||
\return value of type uint32_t at (*ptr)
|
|
||||||
*/
|
|
||||||
#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020)
|
|
||||||
#define __LDREXW(ptr) ((uint32_t ) __ldrex(ptr))
|
|
||||||
#else
|
|
||||||
#define __LDREXW(ptr) _Pragma("push") _Pragma("diag_suppress 3731") ((uint32_t ) __ldrex(ptr)) _Pragma("pop")
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief STR Exclusive (8 bit)
|
|
||||||
\details Executes a exclusive STR instruction for 8 bit values.
|
|
||||||
\param [in] value Value to store
|
|
||||||
\param [in] ptr Pointer to location
|
|
||||||
\return 0 Function succeeded
|
|
||||||
\return 1 Function failed
|
|
||||||
*/
|
|
||||||
#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020)
|
|
||||||
#define __STREXB(value, ptr) __strex(value, ptr)
|
|
||||||
#else
|
|
||||||
#define __STREXB(value, ptr) _Pragma("push") _Pragma("diag_suppress 3731") __strex(value, ptr) _Pragma("pop")
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief STR Exclusive (16 bit)
|
|
||||||
\details Executes a exclusive STR instruction for 16 bit values.
|
|
||||||
\param [in] value Value to store
|
|
||||||
\param [in] ptr Pointer to location
|
|
||||||
\return 0 Function succeeded
|
|
||||||
\return 1 Function failed
|
|
||||||
*/
|
|
||||||
#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020)
|
|
||||||
#define __STREXH(value, ptr) __strex(value, ptr)
|
|
||||||
#else
|
|
||||||
#define __STREXH(value, ptr) _Pragma("push") _Pragma("diag_suppress 3731") __strex(value, ptr) _Pragma("pop")
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief STR Exclusive (32 bit)
|
|
||||||
\details Executes a exclusive STR instruction for 32 bit values.
|
|
||||||
\param [in] value Value to store
|
|
||||||
\param [in] ptr Pointer to location
|
|
||||||
\return 0 Function succeeded
|
|
||||||
\return 1 Function failed
|
|
||||||
*/
|
|
||||||
#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION < 5060020)
|
|
||||||
#define __STREXW(value, ptr) __strex(value, ptr)
|
|
||||||
#else
|
|
||||||
#define __STREXW(value, ptr) _Pragma("push") _Pragma("diag_suppress 3731") __strex(value, ptr) _Pragma("pop")
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Remove the exclusive lock
|
|
||||||
\details Removes the exclusive lock which is created by LDREX.
|
|
||||||
*/
|
|
||||||
#define __CLREX __clrex
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Signed Saturate
|
|
||||||
\details Saturates a signed value.
|
|
||||||
\param [in] value Value to be saturated
|
|
||||||
\param [in] sat Bit position to saturate to (1..32)
|
|
||||||
\return Saturated value
|
|
||||||
*/
|
|
||||||
#define __SSAT __ssat
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Unsigned Saturate
|
|
||||||
\details Saturates an unsigned value.
|
|
||||||
\param [in] value Value to be saturated
|
|
||||||
\param [in] sat Bit position to saturate to (0..31)
|
|
||||||
\return Saturated value
|
|
||||||
*/
|
|
||||||
#define __USAT __usat
|
|
||||||
|
|
||||||
/* ########################### Core Function Access ########################### */
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Enable IRQ Interrupts
|
|
||||||
\details Enables IRQ interrupts by clearing the I-bit in the CPSR.
|
|
||||||
Can only be executed in Privileged modes.
|
|
||||||
*/
|
|
||||||
/* intrinsic void __enable_irq(); */
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Disable IRQ Interrupts
|
|
||||||
\details Disables IRQ interrupts by setting the I-bit in the CPSR.
|
|
||||||
Can only be executed in Privileged modes.
|
|
||||||
*/
|
|
||||||
/* intrinsic void __disable_irq(void); */
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Enable FIQ
|
|
||||||
\details Enables FIQ interrupts by clearing the F-bit in the CPSR.
|
|
||||||
Can only be executed in Privileged modes.
|
|
||||||
*/
|
|
||||||
#define __enable_fault_irq __enable_fiq
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Disable FIQ
|
|
||||||
\details Disables FIQ interrupts by setting the F-bit in the CPSR.
|
|
||||||
Can only be executed in Privileged modes.
|
|
||||||
*/
|
|
||||||
#define __disable_fault_irq __disable_fiq
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Get FPSCR (Floating Point Status/Control)
|
|
||||||
\return Floating Point Status/Control register value
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE uint32_t __get_FPSCR(void)
|
|
||||||
{
|
|
||||||
#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
|
|
||||||
(defined (__FPU_USED ) && (__FPU_USED == 1U)) )
|
|
||||||
register uint32_t __regfpscr __ASM("fpscr");
|
|
||||||
return(__regfpscr);
|
|
||||||
#else
|
|
||||||
return(0U);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Set FPSCR (Floating Point Status/Control)
|
|
||||||
\param [in] fpscr Floating Point Status/Control value to set
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE void __set_FPSCR(uint32_t fpscr)
|
|
||||||
{
|
|
||||||
#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
|
|
||||||
(defined (__FPU_USED ) && (__FPU_USED == 1U)) )
|
|
||||||
register uint32_t __regfpscr __ASM("fpscr");
|
|
||||||
__regfpscr = (fpscr);
|
|
||||||
#else
|
|
||||||
(void)fpscr;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get CPSR (Current Program Status Register)
|
|
||||||
\return CPSR Register value
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE uint32_t __get_CPSR(void)
|
|
||||||
{
|
|
||||||
register uint32_t __regCPSR __ASM("cpsr");
|
|
||||||
return(__regCPSR);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/** \brief Set CPSR (Current Program Status Register)
|
|
||||||
\param [in] cpsr CPSR value to set
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE void __set_CPSR(uint32_t cpsr)
|
|
||||||
{
|
|
||||||
register uint32_t __regCPSR __ASM("cpsr");
|
|
||||||
__regCPSR = cpsr;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get Mode
|
|
||||||
\return Processor Mode
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE uint32_t __get_mode(void)
|
|
||||||
{
|
|
||||||
return (__get_CPSR() & 0x1FU);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set Mode
|
|
||||||
\param [in] mode Mode value to set
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE __ASM void __set_mode(uint32_t mode)
|
|
||||||
{
|
|
||||||
MOV r1, lr
|
|
||||||
MSR CPSR_C, r0
|
|
||||||
BX r1
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get Stack Pointer
|
|
||||||
\return Stack Pointer
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE __ASM uint32_t __get_SP(void)
|
|
||||||
{
|
|
||||||
MOV r0, sp
|
|
||||||
BX lr
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set Stack Pointer
|
|
||||||
\param [in] stack Stack Pointer value to set
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE __ASM void __set_SP(uint32_t stack)
|
|
||||||
{
|
|
||||||
MOV sp, r0
|
|
||||||
BX lr
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/** \brief Get USR/SYS Stack Pointer
|
|
||||||
\return USR/SYSStack Pointer
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE __ASM uint32_t __get_SP_usr(void)
|
|
||||||
{
|
|
||||||
ARM
|
|
||||||
PRESERVE8
|
|
||||||
|
|
||||||
MRS R1, CPSR
|
|
||||||
CPS #0x1F ;no effect in USR mode
|
|
||||||
MOV R0, SP
|
|
||||||
MSR CPSR_c, R1 ;no effect in USR mode
|
|
||||||
ISB
|
|
||||||
BX LR
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set USR/SYS Stack Pointer
|
|
||||||
\param [in] topOfProcStack USR/SYS Stack Pointer value to set
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE __ASM void __set_SP_usr(uint32_t topOfProcStack)
|
|
||||||
{
|
|
||||||
ARM
|
|
||||||
PRESERVE8
|
|
||||||
|
|
||||||
MRS R1, CPSR
|
|
||||||
CPS #0x1F ;no effect in USR mode
|
|
||||||
MOV SP, R0
|
|
||||||
MSR CPSR_c, R1 ;no effect in USR mode
|
|
||||||
ISB
|
|
||||||
BX LR
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get FPEXC (Floating Point Exception Control Register)
|
|
||||||
\return Floating Point Exception Control Register value
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE uint32_t __get_FPEXC(void)
|
|
||||||
{
|
|
||||||
#if (__FPU_PRESENT == 1)
|
|
||||||
register uint32_t __regfpexc __ASM("fpexc");
|
|
||||||
return(__regfpexc);
|
|
||||||
#else
|
|
||||||
return(0);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set FPEXC (Floating Point Exception Control Register)
|
|
||||||
\param [in] fpexc Floating Point Exception Control value to set
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE void __set_FPEXC(uint32_t fpexc)
|
|
||||||
{
|
|
||||||
#if (__FPU_PRESENT == 1)
|
|
||||||
register uint32_t __regfpexc __ASM("fpexc");
|
|
||||||
__regfpexc = (fpexc);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Include common core functions to access Coprocessor 15 registers
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define __get_CP(cp, op1, Rt, CRn, CRm, op2) do { register volatile uint32_t tmp __ASM("cp" # cp ":" # op1 ":c" # CRn ":c" # CRm ":" # op2); (Rt) = tmp; } while(0)
|
|
||||||
#define __set_CP(cp, op1, Rt, CRn, CRm, op2) do { register volatile uint32_t tmp __ASM("cp" # cp ":" # op1 ":c" # CRn ":c" # CRm ":" # op2); tmp = (Rt); } while(0)
|
|
||||||
#define __get_CP64(cp, op1, Rt, CRm) \
|
|
||||||
do { \
|
|
||||||
uint32_t ltmp, htmp; \
|
|
||||||
__ASM volatile("MRRC p" # cp ", " # op1 ", ltmp, htmp, c" # CRm); \
|
|
||||||
(Rt) = ((((uint64_t)htmp) << 32U) | ((uint64_t)ltmp)); \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#define __set_CP64(cp, op1, Rt, CRm) \
|
|
||||||
do { \
|
|
||||||
const uint64_t tmp = (Rt); \
|
|
||||||
const uint32_t ltmp = (uint32_t)(tmp); \
|
|
||||||
const uint32_t htmp = (uint32_t)(tmp >> 32U); \
|
|
||||||
__ASM volatile("MCRR p" # cp ", " # op1 ", ltmp, htmp, c" # CRm); \
|
|
||||||
} while(0)
|
|
||||||
|
|
||||||
#include "cmsis_cp15.h"
|
|
||||||
|
|
||||||
/** \brief Enable Floating Point Unit
|
|
||||||
|
|
||||||
Critical section, called from undef handler, so systick is disabled
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE __ASM void __FPU_Enable(void)
|
|
||||||
{
|
|
||||||
ARM
|
|
||||||
|
|
||||||
//Permit access to VFP/NEON, registers by modifying CPACR
|
|
||||||
MRC p15,0,R1,c1,c0,2
|
|
||||||
ORR R1,R1,#0x00F00000
|
|
||||||
MCR p15,0,R1,c1,c0,2
|
|
||||||
|
|
||||||
//Ensure that subsequent instructions occur in the context of VFP/NEON access permitted
|
|
||||||
ISB
|
|
||||||
|
|
||||||
//Enable VFP/NEON
|
|
||||||
VMRS R1,FPEXC
|
|
||||||
ORR R1,R1,#0x40000000
|
|
||||||
VMSR FPEXC,R1
|
|
||||||
|
|
||||||
//Initialise VFP/NEON registers to 0
|
|
||||||
MOV R2,#0
|
|
||||||
|
|
||||||
//Initialise D16 registers to 0
|
|
||||||
VMOV D0, R2,R2
|
|
||||||
VMOV D1, R2,R2
|
|
||||||
VMOV D2, R2,R2
|
|
||||||
VMOV D3, R2,R2
|
|
||||||
VMOV D4, R2,R2
|
|
||||||
VMOV D5, R2,R2
|
|
||||||
VMOV D6, R2,R2
|
|
||||||
VMOV D7, R2,R2
|
|
||||||
VMOV D8, R2,R2
|
|
||||||
VMOV D9, R2,R2
|
|
||||||
VMOV D10,R2,R2
|
|
||||||
VMOV D11,R2,R2
|
|
||||||
VMOV D12,R2,R2
|
|
||||||
VMOV D13,R2,R2
|
|
||||||
VMOV D14,R2,R2
|
|
||||||
VMOV D15,R2,R2
|
|
||||||
|
|
||||||
IF {TARGET_FEATURE_EXTENSION_REGISTER_COUNT} == 32
|
|
||||||
//Initialise D32 registers to 0
|
|
||||||
VMOV D16,R2,R2
|
|
||||||
VMOV D17,R2,R2
|
|
||||||
VMOV D18,R2,R2
|
|
||||||
VMOV D19,R2,R2
|
|
||||||
VMOV D20,R2,R2
|
|
||||||
VMOV D21,R2,R2
|
|
||||||
VMOV D22,R2,R2
|
|
||||||
VMOV D23,R2,R2
|
|
||||||
VMOV D24,R2,R2
|
|
||||||
VMOV D25,R2,R2
|
|
||||||
VMOV D26,R2,R2
|
|
||||||
VMOV D27,R2,R2
|
|
||||||
VMOV D28,R2,R2
|
|
||||||
VMOV D29,R2,R2
|
|
||||||
VMOV D30,R2,R2
|
|
||||||
VMOV D31,R2,R2
|
|
||||||
ENDIF
|
|
||||||
|
|
||||||
//Initialise FPSCR to a known state
|
|
||||||
VMRS R1,FPSCR
|
|
||||||
LDR R2,=0x00086060 //Mask off all bits that do not have to be preserved. Non-preserved bits can/should be zero.
|
|
||||||
AND R1,R1,R2
|
|
||||||
VMSR FPSCR,R1
|
|
||||||
|
|
||||||
BX LR
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* __CMSIS_ARMCC_H */
|
|
||||||
@ -1,614 +0,0 @@
|
|||||||
/**************************************************************************//**
|
|
||||||
* @file cmsis_armclang.h
|
|
||||||
* @brief CMSIS compiler specific macros, functions, instructions
|
|
||||||
* @version V1.2.1
|
|
||||||
* @date 05. May 2021
|
|
||||||
******************************************************************************/
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2009-2021 Arm Limited. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __CMSIS_ARMCLANG_H
|
|
||||||
#define __CMSIS_ARMCLANG_H
|
|
||||||
|
|
||||||
#pragma clang system_header /* treat file as system include file */
|
|
||||||
|
|
||||||
/* CMSIS compiler specific defines */
|
|
||||||
#ifndef __ASM
|
|
||||||
#define __ASM __asm
|
|
||||||
#endif
|
|
||||||
#ifndef __INLINE
|
|
||||||
#define __INLINE __inline
|
|
||||||
#endif
|
|
||||||
#ifndef __FORCEINLINE
|
|
||||||
#define __FORCEINLINE __attribute__((always_inline))
|
|
||||||
#endif
|
|
||||||
#ifndef __STATIC_INLINE
|
|
||||||
#define __STATIC_INLINE static __inline
|
|
||||||
#endif
|
|
||||||
#ifndef __STATIC_FORCEINLINE
|
|
||||||
#define __STATIC_FORCEINLINE __attribute__((always_inline)) static __inline
|
|
||||||
#endif
|
|
||||||
#ifndef __NO_RETURN
|
|
||||||
#define __NO_RETURN __attribute__((__noreturn__))
|
|
||||||
#endif
|
|
||||||
#ifndef CMSIS_DEPRECATED
|
|
||||||
#define CMSIS_DEPRECATED __attribute__((deprecated))
|
|
||||||
#endif
|
|
||||||
#ifndef __USED
|
|
||||||
#define __USED __attribute__((used))
|
|
||||||
#endif
|
|
||||||
#ifndef __WEAK
|
|
||||||
#define __WEAK __attribute__((weak))
|
|
||||||
#endif
|
|
||||||
#ifndef __PACKED
|
|
||||||
#define __PACKED __attribute__((packed, aligned(1)))
|
|
||||||
#endif
|
|
||||||
#ifndef __PACKED_STRUCT
|
|
||||||
#define __PACKED_STRUCT struct __attribute__((packed, aligned(1)))
|
|
||||||
#endif
|
|
||||||
#ifndef __UNALIGNED_UINT16_WRITE
|
|
||||||
#pragma clang diagnostic push
|
|
||||||
#pragma clang diagnostic ignored "-Wpacked"
|
|
||||||
/*lint -esym(9058, T_UINT16_WRITE)*/ /* disable MISRA 2012 Rule 2.4 for T_UINT16_WRITE */
|
|
||||||
__PACKED_STRUCT T_UINT16_WRITE { uint16_t v; };
|
|
||||||
#pragma clang diagnostic pop
|
|
||||||
#define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void *)(addr))->v) = (val))
|
|
||||||
#endif
|
|
||||||
#ifndef __UNALIGNED_UINT16_READ
|
|
||||||
#pragma clang diagnostic push
|
|
||||||
#pragma clang diagnostic ignored "-Wpacked"
|
|
||||||
/*lint -esym(9058, T_UINT16_READ)*/ /* disable MISRA 2012 Rule 2.4 for T_UINT16_READ */
|
|
||||||
__PACKED_STRUCT T_UINT16_READ { uint16_t v; };
|
|
||||||
#pragma clang diagnostic pop
|
|
||||||
#define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v)
|
|
||||||
#endif
|
|
||||||
#ifndef __UNALIGNED_UINT32_WRITE
|
|
||||||
#pragma clang diagnostic push
|
|
||||||
#pragma clang diagnostic ignored "-Wpacked"
|
|
||||||
/*lint -esym(9058, T_UINT32_WRITE)*/ /* disable MISRA 2012 Rule 2.4 for T_UINT32_WRITE */
|
|
||||||
__PACKED_STRUCT T_UINT32_WRITE { uint32_t v; };
|
|
||||||
#pragma clang diagnostic pop
|
|
||||||
#define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val))
|
|
||||||
#endif
|
|
||||||
#ifndef __UNALIGNED_UINT32_READ
|
|
||||||
#pragma clang diagnostic push
|
|
||||||
#pragma clang diagnostic ignored "-Wpacked"
|
|
||||||
__PACKED_STRUCT T_UINT32_READ { uint32_t v; };
|
|
||||||
#pragma clang diagnostic pop
|
|
||||||
#define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v)
|
|
||||||
#endif
|
|
||||||
#ifndef __ALIGNED
|
|
||||||
#define __ALIGNED(x) __attribute__((aligned(x)))
|
|
||||||
#endif
|
|
||||||
#ifndef __PACKED
|
|
||||||
#define __PACKED __attribute__((packed))
|
|
||||||
#endif
|
|
||||||
#ifndef __COMPILER_BARRIER
|
|
||||||
#define __COMPILER_BARRIER() __ASM volatile("":::"memory")
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* ########################## Core Instruction Access ######################### */
|
|
||||||
/**
|
|
||||||
\brief No Operation
|
|
||||||
*/
|
|
||||||
#define __NOP __builtin_arm_nop
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Wait For Interrupt
|
|
||||||
*/
|
|
||||||
#define __WFI __builtin_arm_wfi
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Wait For Event
|
|
||||||
*/
|
|
||||||
#define __WFE __builtin_arm_wfe
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Send Event
|
|
||||||
*/
|
|
||||||
#define __SEV __builtin_arm_sev
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Instruction Synchronization Barrier
|
|
||||||
*/
|
|
||||||
#define __ISB() __builtin_arm_isb(0xF)
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Data Synchronization Barrier
|
|
||||||
*/
|
|
||||||
#define __DSB() __builtin_arm_dsb(0xF)
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Data Memory Barrier
|
|
||||||
*/
|
|
||||||
#define __DMB() __builtin_arm_dmb(0xF)
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Reverse byte order (32 bit)
|
|
||||||
\details Reverses the byte order in unsigned integer value. For example, 0x12345678 becomes 0x78563412.
|
|
||||||
\param [in] value Value to reverse
|
|
||||||
\return Reversed value
|
|
||||||
*/
|
|
||||||
#define __REV(value) __builtin_bswap32(value)
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Reverse byte order (16 bit)
|
|
||||||
\details Reverses the byte order within each halfword of a word. For example, 0x12345678 becomes 0x34127856.
|
|
||||||
\param [in] value Value to reverse
|
|
||||||
\return Reversed value
|
|
||||||
*/
|
|
||||||
#define __REV16(value) __ROR(__REV(value), 16)
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Reverse byte order (16 bit)
|
|
||||||
\details Reverses the byte order in a 16-bit value and returns the signed 16-bit result. For example, 0x0080 becomes 0x8000.
|
|
||||||
\param [in] value Value to reverse
|
|
||||||
\return Reversed value
|
|
||||||
*/
|
|
||||||
#define __REVSH(value) (int16_t)__builtin_bswap16(value)
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Rotate Right in unsigned value (32 bit)
|
|
||||||
\details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits.
|
|
||||||
\param [in] op1 Value to rotate
|
|
||||||
\param [in] op2 Number of Bits to rotate
|
|
||||||
\return Rotated value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
|
|
||||||
{
|
|
||||||
op2 %= 32U;
|
|
||||||
if (op2 == 0U)
|
|
||||||
{
|
|
||||||
return op1;
|
|
||||||
}
|
|
||||||
return (op1 >> op2) | (op1 << (32U - op2));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Breakpoint
|
|
||||||
\param [in] value is ignored by the processor.
|
|
||||||
If required, a debugger can use it to store additional information about the breakpoint.
|
|
||||||
*/
|
|
||||||
#define __BKPT(value) __ASM volatile ("bkpt "#value)
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Reverse bit order of value
|
|
||||||
\param [in] value Value to reverse
|
|
||||||
\return Reversed value
|
|
||||||
*/
|
|
||||||
#define __RBIT __builtin_arm_rbit
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Count leading zeros
|
|
||||||
\param [in] value Value to count the leading zeros
|
|
||||||
\return number of leading zeros in value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint8_t __CLZ(uint32_t value)
|
|
||||||
{
|
|
||||||
/* Even though __builtin_clz produces a CLZ instruction on ARM, formally
|
|
||||||
__builtin_clz(0) is undefined behaviour, so handle this case specially.
|
|
||||||
This guarantees ARM-compatible results if happening to compile on a non-ARM
|
|
||||||
target, and ensures the compiler doesn't decide to activate any
|
|
||||||
optimisations using the logic "value was passed to __builtin_clz, so it
|
|
||||||
is non-zero".
|
|
||||||
ARM Compiler 6.10 and possibly earlier will optimise this test away, leaving a
|
|
||||||
single CLZ instruction.
|
|
||||||
*/
|
|
||||||
if (value == 0U)
|
|
||||||
{
|
|
||||||
return 32U;
|
|
||||||
}
|
|
||||||
return __builtin_clz(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief LDR Exclusive (8 bit)
|
|
||||||
\details Executes a exclusive LDR instruction for 8 bit value.
|
|
||||||
\param [in] ptr Pointer to data
|
|
||||||
\return value of type uint8_t at (*ptr)
|
|
||||||
*/
|
|
||||||
#define __LDREXB (uint8_t)__builtin_arm_ldrex
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief LDR Exclusive (16 bit)
|
|
||||||
\details Executes a exclusive LDR instruction for 16 bit values.
|
|
||||||
\param [in] ptr Pointer to data
|
|
||||||
\return value of type uint16_t at (*ptr)
|
|
||||||
*/
|
|
||||||
#define __LDREXH (uint16_t)__builtin_arm_ldrex
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief LDR Exclusive (32 bit)
|
|
||||||
\details Executes a exclusive LDR instruction for 32 bit values.
|
|
||||||
\param [in] ptr Pointer to data
|
|
||||||
\return value of type uint32_t at (*ptr)
|
|
||||||
*/
|
|
||||||
#define __LDREXW (uint32_t)__builtin_arm_ldrex
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief STR Exclusive (8 bit)
|
|
||||||
\details Executes a exclusive STR instruction for 8 bit values.
|
|
||||||
\param [in] value Value to store
|
|
||||||
\param [in] ptr Pointer to location
|
|
||||||
\return 0 Function succeeded
|
|
||||||
\return 1 Function failed
|
|
||||||
*/
|
|
||||||
#define __STREXB (uint32_t)__builtin_arm_strex
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief STR Exclusive (16 bit)
|
|
||||||
\details Executes a exclusive STR instruction for 16 bit values.
|
|
||||||
\param [in] value Value to store
|
|
||||||
\param [in] ptr Pointer to location
|
|
||||||
\return 0 Function succeeded
|
|
||||||
\return 1 Function failed
|
|
||||||
*/
|
|
||||||
#define __STREXH (uint32_t)__builtin_arm_strex
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief STR Exclusive (32 bit)
|
|
||||||
\details Executes a exclusive STR instruction for 32 bit values.
|
|
||||||
\param [in] value Value to store
|
|
||||||
\param [in] ptr Pointer to location
|
|
||||||
\return 0 Function succeeded
|
|
||||||
\return 1 Function failed
|
|
||||||
*/
|
|
||||||
#define __STREXW (uint32_t)__builtin_arm_strex
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Remove the exclusive lock
|
|
||||||
\details Removes the exclusive lock which is created by LDREX.
|
|
||||||
*/
|
|
||||||
#define __CLREX __builtin_arm_clrex
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Signed Saturate
|
|
||||||
\details Saturates a signed value.
|
|
||||||
\param [in] value Value to be saturated
|
|
||||||
\param [in] sat Bit position to saturate to (1..32)
|
|
||||||
\return Saturated value
|
|
||||||
*/
|
|
||||||
#define __SSAT __builtin_arm_ssat
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Unsigned Saturate
|
|
||||||
\details Saturates an unsigned value.
|
|
||||||
\param [in] value Value to be saturated
|
|
||||||
\param [in] sat Bit position to saturate to (0..31)
|
|
||||||
\return Saturated value
|
|
||||||
*/
|
|
||||||
#define __USAT __builtin_arm_usat
|
|
||||||
|
|
||||||
/* ################### Compiler specific Intrinsics ########################### */
|
|
||||||
/** \defgroup CMSIS_SIMD_intrinsics CMSIS SIMD Intrinsics
|
|
||||||
Access to dedicated SIMD instructions
|
|
||||||
@{
|
|
||||||
*/
|
|
||||||
|
|
||||||
#if (defined (__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
|
|
||||||
|
|
||||||
#define __SADD8 __builtin_arm_sadd8
|
|
||||||
#define __SADD16 __builtin_arm_sadd16
|
|
||||||
#define __QADD8 __builtin_arm_qadd8
|
|
||||||
#define __QSUB8 __builtin_arm_qsub8
|
|
||||||
#define __QADD16 __builtin_arm_qadd16
|
|
||||||
#define __SHADD16 __builtin_arm_shadd16
|
|
||||||
#define __QSUB16 __builtin_arm_qsub16
|
|
||||||
#define __SHSUB16 __builtin_arm_shsub16
|
|
||||||
#define __QASX __builtin_arm_qasx
|
|
||||||
#define __SHASX __builtin_arm_shasx
|
|
||||||
#define __QSAX __builtin_arm_qsax
|
|
||||||
#define __SHSAX __builtin_arm_shsax
|
|
||||||
#define __SXTB16 __builtin_arm_sxtb16
|
|
||||||
#define __SMUAD __builtin_arm_smuad
|
|
||||||
#define __SMUADX __builtin_arm_smuadx
|
|
||||||
#define __SMLAD __builtin_arm_smlad
|
|
||||||
#define __SMLADX __builtin_arm_smladx
|
|
||||||
#define __SMLALD __builtin_arm_smlald
|
|
||||||
#define __SMLALDX __builtin_arm_smlaldx
|
|
||||||
#define __SMUSD __builtin_arm_smusd
|
|
||||||
#define __SMUSDX __builtin_arm_smusdx
|
|
||||||
#define __SMLSDX __builtin_arm_smlsdx
|
|
||||||
#define __USAT16 __builtin_arm_usat16
|
|
||||||
#define __SSUB8 __builtin_arm_ssub8
|
|
||||||
#define __SXTB16 __builtin_arm_sxtb16
|
|
||||||
#define __SXTAB16 __builtin_arm_sxtab16
|
|
||||||
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE int32_t __QADD( int32_t op1, int32_t op2)
|
|
||||||
{
|
|
||||||
int32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("qadd %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE int32_t __QSUB( int32_t op1, int32_t op2)
|
|
||||||
{
|
|
||||||
int32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("qsub %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define __PKHBT(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0x0000FFFFUL) | \
|
|
||||||
((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL) )
|
|
||||||
|
|
||||||
#define __PKHTB(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0xFFFF0000UL) | \
|
|
||||||
((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL) )
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE int32_t __SMMLA (int32_t op1, int32_t op2, int32_t op3)
|
|
||||||
{
|
|
||||||
int32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("smmla %0, %1, %2, %3" : "=r" (result): "r" (op1), "r" (op2), "r" (op3) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* (__ARM_FEATURE_DSP == 1) */
|
|
||||||
|
|
||||||
/* ########################### Core Function Access ########################### */
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Enable IRQ Interrupts
|
|
||||||
\details Enables IRQ interrupts by clearing the I-bit in the CPSR.
|
|
||||||
Can only be executed in Privileged modes.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __enable_irq(void)
|
|
||||||
{
|
|
||||||
__ASM volatile ("cpsie i" : : : "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Disable IRQ Interrupts
|
|
||||||
\details Disables IRQ interrupts by setting the I-bit in the CPSR.
|
|
||||||
Can only be executed in Privileged modes.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __disable_irq(void)
|
|
||||||
{
|
|
||||||
__ASM volatile ("cpsid i" : : : "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Enable FIQ
|
|
||||||
\details Enables FIQ interrupts by clearing the F-bit in the CPSR.
|
|
||||||
Can only be executed in Privileged modes.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __enable_fault_irq(void)
|
|
||||||
{
|
|
||||||
__ASM volatile ("cpsie f" : : : "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Disable FIQ
|
|
||||||
\details Disables FIQ interrupts by setting the F-bit in the CPSR.
|
|
||||||
Can only be executed in Privileged modes.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __disable_fault_irq(void)
|
|
||||||
{
|
|
||||||
__ASM volatile ("cpsid f" : : : "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Get FPSCR
|
|
||||||
\details Returns the current value of the Floating Point Status/Control register.
|
|
||||||
\return Floating Point Status/Control register value
|
|
||||||
*/
|
|
||||||
#define __get_FPSCR __builtin_arm_get_fpscr
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Set FPSCR
|
|
||||||
\details Assigns the given value to the Floating Point Status/Control register.
|
|
||||||
\param [in] fpscr Floating Point Status/Control value to set
|
|
||||||
*/
|
|
||||||
#define __set_FPSCR __builtin_arm_set_fpscr
|
|
||||||
|
|
||||||
/** \brief Get CPSR Register
|
|
||||||
\return CPSR Register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_CPSR(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__ASM volatile("MRS %0, cpsr" : "=r" (result) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set CPSR Register
|
|
||||||
\param [in] cpsr CPSR value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_CPSR(uint32_t cpsr)
|
|
||||||
{
|
|
||||||
__ASM volatile ("MSR cpsr, %0" : : "r" (cpsr) : "cc", "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get Mode
|
|
||||||
\return Processor Mode
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_mode(void)
|
|
||||||
{
|
|
||||||
return (__get_CPSR() & 0x1FU);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set Mode
|
|
||||||
\param [in] mode Mode value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_mode(uint32_t mode)
|
|
||||||
{
|
|
||||||
__ASM volatile("MSR cpsr_c, %0" : : "r" (mode) : "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get Stack Pointer
|
|
||||||
\return Stack Pointer value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_SP(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__ASM volatile("MOV %0, sp" : "=r" (result) : : "memory");
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set Stack Pointer
|
|
||||||
\param [in] stack Stack Pointer value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_SP(uint32_t stack)
|
|
||||||
{
|
|
||||||
__ASM volatile("MOV sp, %0" : : "r" (stack) : "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get USR/SYS Stack Pointer
|
|
||||||
\return USR/SYS Stack Pointer value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_SP_usr(void)
|
|
||||||
{
|
|
||||||
uint32_t cpsr;
|
|
||||||
uint32_t result;
|
|
||||||
__ASM volatile(
|
|
||||||
"MRS %0, cpsr \n"
|
|
||||||
"CPS #0x1F \n" // no effect in USR mode
|
|
||||||
"MOV %1, sp \n"
|
|
||||||
"MSR cpsr_c, %0 \n" // no effect in USR mode
|
|
||||||
"ISB" : "=r"(cpsr), "=r"(result) : : "memory"
|
|
||||||
);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set USR/SYS Stack Pointer
|
|
||||||
\param [in] topOfProcStack USR/SYS Stack Pointer value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_SP_usr(uint32_t topOfProcStack)
|
|
||||||
{
|
|
||||||
uint32_t cpsr;
|
|
||||||
__ASM volatile(
|
|
||||||
"MRS %0, cpsr \n"
|
|
||||||
"CPS #0x1F \n" // no effect in USR mode
|
|
||||||
"MOV sp, %1 \n"
|
|
||||||
"MSR cpsr_c, %0 \n" // no effect in USR mode
|
|
||||||
"ISB" : "=r"(cpsr) : "r" (topOfProcStack) : "memory"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get FPEXC
|
|
||||||
\return Floating Point Exception Control register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_FPEXC(void)
|
|
||||||
{
|
|
||||||
#if (__FPU_PRESENT == 1)
|
|
||||||
uint32_t result;
|
|
||||||
__ASM volatile("VMRS %0, fpexc" : "=r" (result) : : "memory");
|
|
||||||
return(result);
|
|
||||||
#else
|
|
||||||
return(0);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set FPEXC
|
|
||||||
\param [in] fpexc Floating Point Exception Control value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_FPEXC(uint32_t fpexc)
|
|
||||||
{
|
|
||||||
#if (__FPU_PRESENT == 1)
|
|
||||||
__ASM volatile ("VMSR fpexc, %0" : : "r" (fpexc) : "memory");
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Include common core functions to access Coprocessor 15 registers
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define __get_CP(cp, op1, Rt, CRn, CRm, op2) __ASM volatile("MRC p" # cp ", " # op1 ", %0, c" # CRn ", c" # CRm ", " # op2 : "=r" (Rt) : : "memory" )
|
|
||||||
#define __set_CP(cp, op1, Rt, CRn, CRm, op2) __ASM volatile("MCR p" # cp ", " # op1 ", %0, c" # CRn ", c" # CRm ", " # op2 : : "r" (Rt) : "memory" )
|
|
||||||
#define __get_CP64(cp, op1, Rt, CRm) __ASM volatile("MRRC p" # cp ", " # op1 ", %Q0, %R0, c" # CRm : "=r" (Rt) : : "memory" )
|
|
||||||
#define __set_CP64(cp, op1, Rt, CRm) __ASM volatile("MCRR p" # cp ", " # op1 ", %Q0, %R0, c" # CRm : : "r" (Rt) : "memory" )
|
|
||||||
|
|
||||||
#include "cmsis_cp15.h"
|
|
||||||
|
|
||||||
/** \brief Enable Floating Point Unit
|
|
||||||
|
|
||||||
Critical section, called from undef handler, so systick is disabled
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE void __FPU_Enable(void)
|
|
||||||
{
|
|
||||||
__ASM volatile(
|
|
||||||
//Permit access to VFP/NEON, registers by modifying CPACR
|
|
||||||
" MRC p15,0,R1,c1,c0,2 \n"
|
|
||||||
" ORR R1,R1,#0x00F00000 \n"
|
|
||||||
" MCR p15,0,R1,c1,c0,2 \n"
|
|
||||||
|
|
||||||
//Ensure that subsequent instructions occur in the context of VFP/NEON access permitted
|
|
||||||
" ISB \n"
|
|
||||||
|
|
||||||
//Enable VFP/NEON
|
|
||||||
" VMRS R1,FPEXC \n"
|
|
||||||
" ORR R1,R1,#0x40000000 \n"
|
|
||||||
" VMSR FPEXC,R1 \n"
|
|
||||||
|
|
||||||
//Initialise VFP/NEON registers to 0
|
|
||||||
" MOV R2,#0 \n"
|
|
||||||
|
|
||||||
//Initialise D16 registers to 0
|
|
||||||
" VMOV D0, R2,R2 \n"
|
|
||||||
" VMOV D1, R2,R2 \n"
|
|
||||||
" VMOV D2, R2,R2 \n"
|
|
||||||
" VMOV D3, R2,R2 \n"
|
|
||||||
" VMOV D4, R2,R2 \n"
|
|
||||||
" VMOV D5, R2,R2 \n"
|
|
||||||
" VMOV D6, R2,R2 \n"
|
|
||||||
" VMOV D7, R2,R2 \n"
|
|
||||||
" VMOV D8, R2,R2 \n"
|
|
||||||
" VMOV D9, R2,R2 \n"
|
|
||||||
" VMOV D10,R2,R2 \n"
|
|
||||||
" VMOV D11,R2,R2 \n"
|
|
||||||
" VMOV D12,R2,R2 \n"
|
|
||||||
" VMOV D13,R2,R2 \n"
|
|
||||||
" VMOV D14,R2,R2 \n"
|
|
||||||
" VMOV D15,R2,R2 \n"
|
|
||||||
|
|
||||||
#if (defined(__ARM_NEON) && (__ARM_NEON == 1))
|
|
||||||
//Initialise D32 registers to 0
|
|
||||||
" VMOV D16,R2,R2 \n"
|
|
||||||
" VMOV D17,R2,R2 \n"
|
|
||||||
" VMOV D18,R2,R2 \n"
|
|
||||||
" VMOV D19,R2,R2 \n"
|
|
||||||
" VMOV D20,R2,R2 \n"
|
|
||||||
" VMOV D21,R2,R2 \n"
|
|
||||||
" VMOV D22,R2,R2 \n"
|
|
||||||
" VMOV D23,R2,R2 \n"
|
|
||||||
" VMOV D24,R2,R2 \n"
|
|
||||||
" VMOV D25,R2,R2 \n"
|
|
||||||
" VMOV D26,R2,R2 \n"
|
|
||||||
" VMOV D27,R2,R2 \n"
|
|
||||||
" VMOV D28,R2,R2 \n"
|
|
||||||
" VMOV D29,R2,R2 \n"
|
|
||||||
" VMOV D30,R2,R2 \n"
|
|
||||||
" VMOV D31,R2,R2 \n"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//Initialise FPSCR to a known state
|
|
||||||
" VMRS R1,FPSCR \n"
|
|
||||||
" LDR R2,=0x00086060 \n" //Mask off all bits that do not have to be preserved. Non-preserved bits can/should be zero.
|
|
||||||
" AND R1,R1,R2 \n"
|
|
||||||
" VMSR FPSCR,R1 "
|
|
||||||
: : : "cc", "r1", "r2"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* __CMSIS_ARMCLANG_H */
|
|
||||||
@ -1,213 +0,0 @@
|
|||||||
/**************************************************************************//**
|
|
||||||
* @file cmsis_compiler.h
|
|
||||||
* @brief CMSIS compiler specific macros, functions, instructions
|
|
||||||
* @version V1.0.2
|
|
||||||
* @date 10. January 2018
|
|
||||||
******************************************************************************/
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2009-2018 Arm Limited. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __CMSIS_COMPILER_H
|
|
||||||
#define __CMSIS_COMPILER_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Arm Compiler 4/5
|
|
||||||
*/
|
|
||||||
#if defined ( __CC_ARM )
|
|
||||||
#include "cmsis_armcc.h"
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Arm Compiler 6 (armclang)
|
|
||||||
*/
|
|
||||||
#elif defined (__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
|
|
||||||
#include "cmsis_armclang.h"
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* GNU Compiler
|
|
||||||
*/
|
|
||||||
#elif defined ( __GNUC__ )
|
|
||||||
#include "cmsis_gcc.h"
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* IAR Compiler
|
|
||||||
*/
|
|
||||||
#elif defined ( __ICCARM__ )
|
|
||||||
#include "cmsis_iccarm.h"
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* TI Arm Compiler
|
|
||||||
*/
|
|
||||||
#elif defined ( __TI_ARM__ )
|
|
||||||
#include <cmsis_ccs.h>
|
|
||||||
|
|
||||||
#ifndef __ASM
|
|
||||||
#define __ASM __asm
|
|
||||||
#endif
|
|
||||||
#ifndef __INLINE
|
|
||||||
#define __INLINE inline
|
|
||||||
#endif
|
|
||||||
#ifndef __STATIC_INLINE
|
|
||||||
#define __STATIC_INLINE static inline
|
|
||||||
#endif
|
|
||||||
#ifndef __STATIC_INLINE
|
|
||||||
#define __STATIC_INLINE static inline
|
|
||||||
#endif
|
|
||||||
#ifndef __STATIC_FORCEINLINE
|
|
||||||
#define __STATIC_FORCEINLINE __STATIC_INLINE
|
|
||||||
#endif
|
|
||||||
#ifndef __NO_RETURN
|
|
||||||
#define __NO_RETURN __attribute__((noreturn))
|
|
||||||
#endif
|
|
||||||
#ifndef CMSIS_DEPRECATED
|
|
||||||
#define CMSIS_DEPRECATED __attribute__((deprecated))
|
|
||||||
#endif
|
|
||||||
#ifndef __USED
|
|
||||||
#define __USED __attribute__((used))
|
|
||||||
#endif
|
|
||||||
#ifndef __WEAK
|
|
||||||
#define __WEAK __attribute__((weak))
|
|
||||||
#endif
|
|
||||||
#ifndef __UNALIGNED_UINT32
|
|
||||||
struct __attribute__((packed)) T_UINT32 { uint32_t v; };
|
|
||||||
#define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v)
|
|
||||||
#endif
|
|
||||||
#ifndef __ALIGNED
|
|
||||||
#define __ALIGNED(x) __attribute__((aligned(x)))
|
|
||||||
#endif
|
|
||||||
#ifndef __PACKED
|
|
||||||
#define __PACKED __attribute__((packed))
|
|
||||||
#endif
|
|
||||||
#ifndef __COMPILER_BARRIER
|
|
||||||
#warning No compiler specific solution for __COMPILER_BARRIER. __COMPILER_BARRIER is ignored.
|
|
||||||
#define __COMPILER_BARRIER() (void)0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* TASKING Compiler
|
|
||||||
*/
|
|
||||||
#elif defined ( __TASKING__ )
|
|
||||||
/*
|
|
||||||
* The CMSIS functions have been implemented as intrinsics in the compiler.
|
|
||||||
* Please use "carm -?i" to get an up to date list of all intrinsics,
|
|
||||||
* Including the CMSIS ones.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __ASM
|
|
||||||
#define __ASM __asm
|
|
||||||
#endif
|
|
||||||
#ifndef __INLINE
|
|
||||||
#define __INLINE inline
|
|
||||||
#endif
|
|
||||||
#ifndef __STATIC_INLINE
|
|
||||||
#define __STATIC_INLINE static inline
|
|
||||||
#endif
|
|
||||||
#ifndef __STATIC_FORCEINLINE
|
|
||||||
#define __STATIC_FORCEINLINE __STATIC_INLINE
|
|
||||||
#endif
|
|
||||||
#ifndef __NO_RETURN
|
|
||||||
#define __NO_RETURN __attribute__((noreturn))
|
|
||||||
#endif
|
|
||||||
#ifndef CMSIS_DEPRECATED
|
|
||||||
#define CMSIS_DEPRECATED __attribute__((deprecated))
|
|
||||||
#endif
|
|
||||||
#ifndef __USED
|
|
||||||
#define __USED __attribute__((used))
|
|
||||||
#endif
|
|
||||||
#ifndef __WEAK
|
|
||||||
#define __WEAK __attribute__((weak))
|
|
||||||
#endif
|
|
||||||
#ifndef __UNALIGNED_UINT32
|
|
||||||
struct __packed__ T_UINT32 { uint32_t v; };
|
|
||||||
#define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v)
|
|
||||||
#endif
|
|
||||||
#ifndef __ALIGNED
|
|
||||||
#define __ALIGNED(x) __align(x)
|
|
||||||
#endif
|
|
||||||
#ifndef __PACKED
|
|
||||||
#define __PACKED __packed__
|
|
||||||
#endif
|
|
||||||
#ifndef __COMPILER_BARRIER
|
|
||||||
#warning No compiler specific solution for __COMPILER_BARRIER. __COMPILER_BARRIER is ignored.
|
|
||||||
#define __COMPILER_BARRIER() (void)0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/*
|
|
||||||
* COSMIC Compiler
|
|
||||||
*/
|
|
||||||
#elif defined ( __CSMC__ )
|
|
||||||
#include <cmsis_csm.h>
|
|
||||||
|
|
||||||
#ifndef __ASM
|
|
||||||
#define __ASM _asm
|
|
||||||
#endif
|
|
||||||
#ifndef __INLINE
|
|
||||||
#define __INLINE inline
|
|
||||||
#endif
|
|
||||||
#ifndef __STATIC_INLINE
|
|
||||||
#define __STATIC_INLINE static inline
|
|
||||||
#endif
|
|
||||||
#ifndef __STATIC_FORCEINLINE
|
|
||||||
#define __STATIC_FORCEINLINE __STATIC_INLINE
|
|
||||||
#endif
|
|
||||||
#ifndef __NO_RETURN
|
|
||||||
// NO RETURN is automatically detected hence no warning here
|
|
||||||
#define __NO_RETURN
|
|
||||||
#endif
|
|
||||||
#ifndef __USED
|
|
||||||
#warning No compiler specific solution for __USED. __USED is ignored.
|
|
||||||
#define __USED
|
|
||||||
#endif
|
|
||||||
#ifndef CMSIS_DEPRECATED
|
|
||||||
#warning No compiler specific solution for CMSIS_DEPRECATED. CMSIS_DEPRECATED is ignored.
|
|
||||||
#define CMSIS_DEPRECATED
|
|
||||||
#endif
|
|
||||||
#ifndef __WEAK
|
|
||||||
#define __WEAK __weak
|
|
||||||
#endif
|
|
||||||
#ifndef __UNALIGNED_UINT32
|
|
||||||
@packed struct T_UINT32 { uint32_t v; };
|
|
||||||
#define __UNALIGNED_UINT32(x) (((struct T_UINT32 *)(x))->v)
|
|
||||||
#endif
|
|
||||||
#ifndef __ALIGNED
|
|
||||||
#warning No compiler specific solution for __ALIGNED. __ALIGNED is ignored.
|
|
||||||
#define __ALIGNED(x)
|
|
||||||
#endif
|
|
||||||
#ifndef __PACKED
|
|
||||||
#define __PACKED @packed
|
|
||||||
#endif
|
|
||||||
#ifndef __COMPILER_BARRIER
|
|
||||||
#warning No compiler specific solution for __COMPILER_BARRIER. __COMPILER_BARRIER is ignored.
|
|
||||||
#define __COMPILER_BARRIER() (void)0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#else
|
|
||||||
#error Unknown compiler.
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* __CMSIS_COMPILER_H */
|
|
||||||
|
|
||||||
@ -1,514 +0,0 @@
|
|||||||
/**************************************************************************//**
|
|
||||||
* @file cmsis_cp15.h
|
|
||||||
* @brief CMSIS compiler specific macros, functions, instructions
|
|
||||||
* @version V1.0.1
|
|
||||||
* @date 07. Sep 2017
|
|
||||||
******************************************************************************/
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2009-2017 ARM Limited. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#if defined ( __ICCARM__ )
|
|
||||||
#pragma system_include /* treat file as system include file for MISRA check */
|
|
||||||
#elif defined (__clang__)
|
|
||||||
#pragma clang system_header /* treat file as system include file */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __CMSIS_CP15_H
|
|
||||||
#define __CMSIS_CP15_H
|
|
||||||
|
|
||||||
/** \brief Get ACTLR
|
|
||||||
\return Auxiliary Control register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_ACTLR(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__get_CP(15, 0, result, 1, 0, 1);
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set ACTLR
|
|
||||||
\param [in] actlr Auxiliary Control value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_ACTLR(uint32_t actlr)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, actlr, 1, 0, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get CPACR
|
|
||||||
\return Coprocessor Access Control register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_CPACR(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__get_CP(15, 0, result, 1, 0, 2);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set CPACR
|
|
||||||
\param [in] cpacr Coprocessor Access Control value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_CPACR(uint32_t cpacr)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, cpacr, 1, 0, 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get DFSR
|
|
||||||
\return Data Fault Status Register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_DFSR(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__get_CP(15, 0, result, 5, 0, 0);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set DFSR
|
|
||||||
\param [in] dfsr Data Fault Status value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_DFSR(uint32_t dfsr)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, dfsr, 5, 0, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get IFSR
|
|
||||||
\return Instruction Fault Status Register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_IFSR(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__get_CP(15, 0, result, 5, 0, 1);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set IFSR
|
|
||||||
\param [in] ifsr Instruction Fault Status value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_IFSR(uint32_t ifsr)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, ifsr, 5, 0, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get ISR
|
|
||||||
\return Interrupt Status Register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_ISR(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__get_CP(15, 0, result, 12, 1, 0);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get CBAR
|
|
||||||
\return Configuration Base Address register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_CBAR(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__get_CP(15, 4, result, 15, 0, 0);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get TTBR0
|
|
||||||
|
|
||||||
This function returns the value of the Translation Table Base Register 0.
|
|
||||||
|
|
||||||
\return Translation Table Base Register 0 value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_TTBR0(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__get_CP(15, 0, result, 2, 0, 0);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set TTBR0
|
|
||||||
|
|
||||||
This function assigns the given value to the Translation Table Base Register 0.
|
|
||||||
|
|
||||||
\param [in] ttbr0 Translation Table Base Register 0 value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_TTBR0(uint32_t ttbr0)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, ttbr0, 2, 0, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get DACR
|
|
||||||
|
|
||||||
This function returns the value of the Domain Access Control Register.
|
|
||||||
|
|
||||||
\return Domain Access Control Register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_DACR(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__get_CP(15, 0, result, 3, 0, 0);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set DACR
|
|
||||||
|
|
||||||
This function assigns the given value to the Domain Access Control Register.
|
|
||||||
|
|
||||||
\param [in] dacr Domain Access Control Register value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_DACR(uint32_t dacr)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, dacr, 3, 0, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set SCTLR
|
|
||||||
|
|
||||||
This function assigns the given value to the System Control Register.
|
|
||||||
|
|
||||||
\param [in] sctlr System Control Register value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_SCTLR(uint32_t sctlr)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, sctlr, 1, 0, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get SCTLR
|
|
||||||
\return System Control Register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_SCTLR(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__get_CP(15, 0, result, 1, 0, 0);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set ACTRL
|
|
||||||
\param [in] actrl Auxiliary Control Register value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_ACTRL(uint32_t actrl)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, actrl, 1, 0, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get ACTRL
|
|
||||||
\return Auxiliary Control Register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_ACTRL(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__get_CP(15, 0, result, 1, 0, 1);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get MPIDR
|
|
||||||
|
|
||||||
This function returns the value of the Multiprocessor Affinity Register.
|
|
||||||
|
|
||||||
\return Multiprocessor Affinity Register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_MPIDR(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__get_CP(15, 0, result, 0, 0, 5);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get VBAR
|
|
||||||
|
|
||||||
This function returns the value of the Vector Base Address Register.
|
|
||||||
|
|
||||||
\return Vector Base Address Register
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_VBAR(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__get_CP(15, 0, result, 12, 0, 0);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set VBAR
|
|
||||||
|
|
||||||
This function assigns the given value to the Vector Base Address Register.
|
|
||||||
|
|
||||||
\param [in] vbar Vector Base Address Register value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_VBAR(uint32_t vbar)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, vbar, 12, 0, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get MVBAR
|
|
||||||
|
|
||||||
This function returns the value of the Monitor Vector Base Address Register.
|
|
||||||
|
|
||||||
\return Monitor Vector Base Address Register
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_MVBAR(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__get_CP(15, 0, result, 12, 0, 1);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set MVBAR
|
|
||||||
|
|
||||||
This function assigns the given value to the Monitor Vector Base Address Register.
|
|
||||||
|
|
||||||
\param [in] mvbar Monitor Vector Base Address Register value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_MVBAR(uint32_t mvbar)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, mvbar, 12, 0, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
#if (defined(__CORTEX_A) && (__CORTEX_A == 7U) && \
|
|
||||||
defined(__TIM_PRESENT) && (__TIM_PRESENT == 1U)) || \
|
|
||||||
defined(DOXYGEN)
|
|
||||||
|
|
||||||
/** \brief Set CNTFRQ
|
|
||||||
|
|
||||||
This function assigns the given value to PL1 Physical Timer Counter Frequency Register (CNTFRQ).
|
|
||||||
|
|
||||||
\param [in] value CNTFRQ Register value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_CNTFRQ(uint32_t value)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, value, 14, 0, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get CNTFRQ
|
|
||||||
|
|
||||||
This function returns the value of the PL1 Physical Timer Counter Frequency Register (CNTFRQ).
|
|
||||||
|
|
||||||
\return CNTFRQ Register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_CNTFRQ(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__get_CP(15, 0, result, 14, 0 , 0);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set CNTP_TVAL
|
|
||||||
|
|
||||||
This function assigns the given value to PL1 Physical Timer Value Register (CNTP_TVAL).
|
|
||||||
|
|
||||||
\param [in] value CNTP_TVAL Register value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_CNTP_TVAL(uint32_t value)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, value, 14, 2, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get CNTP_TVAL
|
|
||||||
|
|
||||||
This function returns the value of the PL1 Physical Timer Value Register (CNTP_TVAL).
|
|
||||||
|
|
||||||
\return CNTP_TVAL Register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_CNTP_TVAL(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__get_CP(15, 0, result, 14, 2, 0);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get CNTPCT
|
|
||||||
|
|
||||||
This function returns the value of the 64 bits PL1 Physical Count Register (CNTPCT).
|
|
||||||
|
|
||||||
\return CNTPCT Register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint64_t __get_CNTPCT(void)
|
|
||||||
{
|
|
||||||
uint64_t result;
|
|
||||||
__get_CP64(15, 0, result, 14);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set CNTP_CVAL
|
|
||||||
|
|
||||||
This function assigns the given value to 64bits PL1 Physical Timer CompareValue Register (CNTP_CVAL).
|
|
||||||
|
|
||||||
\param [in] value CNTP_CVAL Register value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_CNTP_CVAL(uint64_t value)
|
|
||||||
{
|
|
||||||
__set_CP64(15, 2, value, 14);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get CNTP_CVAL
|
|
||||||
|
|
||||||
This function returns the value of the 64 bits PL1 Physical Timer CompareValue Register (CNTP_CVAL).
|
|
||||||
|
|
||||||
\return CNTP_CVAL Register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint64_t __get_CNTP_CVAL(void)
|
|
||||||
{
|
|
||||||
uint64_t result;
|
|
||||||
__get_CP64(15, 2, result, 14);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set CNTP_CTL
|
|
||||||
|
|
||||||
This function assigns the given value to PL1 Physical Timer Control Register (CNTP_CTL).
|
|
||||||
|
|
||||||
\param [in] value CNTP_CTL Register value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_CNTP_CTL(uint32_t value)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, value, 14, 2, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get CNTP_CTL register
|
|
||||||
\return CNTP_CTL Register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_CNTP_CTL(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__get_CP(15, 0, result, 14, 2, 1);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/** \brief Set TLBIALL
|
|
||||||
|
|
||||||
TLB Invalidate All
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_TLBIALL(uint32_t value)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, value, 8, 7, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set BPIALL.
|
|
||||||
|
|
||||||
Branch Predictor Invalidate All
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_BPIALL(uint32_t value)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, value, 7, 5, 6);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set ICIALLU
|
|
||||||
|
|
||||||
Instruction Cache Invalidate All
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_ICIALLU(uint32_t value)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, value, 7, 5, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set DCCMVAC
|
|
||||||
|
|
||||||
Data cache clean
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_DCCMVAC(uint32_t value)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, value, 7, 10, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set DCIMVAC
|
|
||||||
|
|
||||||
Data cache invalidate
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_DCIMVAC(uint32_t value)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, value, 7, 6, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set DCCIMVAC
|
|
||||||
|
|
||||||
Data cache clean and invalidate
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_DCCIMVAC(uint32_t value)
|
|
||||||
{
|
|
||||||
__set_CP(15, 0, value, 7, 14, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set CSSELR
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_CSSELR(uint32_t value)
|
|
||||||
{
|
|
||||||
// __ASM volatile("MCR p15, 2, %0, c0, c0, 0" : : "r"(value) : "memory");
|
|
||||||
__set_CP(15, 2, value, 0, 0, 0);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get CSSELR
|
|
||||||
\return CSSELR Register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_CSSELR(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
// __ASM volatile("MRC p15, 2, %0, c0, c0, 0" : "=r"(result) : : "memory");
|
|
||||||
__get_CP(15, 2, result, 0, 0, 0);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set CCSIDR
|
|
||||||
\deprecated CCSIDR itself is read-only. Use __set_CSSELR to select cache level instead.
|
|
||||||
*/
|
|
||||||
CMSIS_DEPRECATED
|
|
||||||
__STATIC_FORCEINLINE void __set_CCSIDR(uint32_t value)
|
|
||||||
{
|
|
||||||
__set_CSSELR(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get CCSIDR
|
|
||||||
\return CCSIDR Register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_CCSIDR(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
// __ASM volatile("MRC p15, 1, %0, c0, c0, 0" : "=r"(result) : : "memory");
|
|
||||||
__get_CP(15, 1, result, 0, 0, 0);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get CLIDR
|
|
||||||
\return CLIDR Register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_CLIDR(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
// __ASM volatile("MRC p15, 1, %0, c0, c0, 1" : "=r"(result) : : "memory");
|
|
||||||
__get_CP(15, 1, result, 0, 0, 1);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set DCISW
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_DCISW(uint32_t value)
|
|
||||||
{
|
|
||||||
// __ASM volatile("MCR p15, 0, %0, c7, c6, 2" : : "r"(value) : "memory")
|
|
||||||
__set_CP(15, 0, value, 7, 6, 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set DCCSW
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_DCCSW(uint32_t value)
|
|
||||||
{
|
|
||||||
// __ASM volatile("MCR p15, 0, %0, c7, c10, 2" : : "r"(value) : "memory")
|
|
||||||
__set_CP(15, 0, value, 7, 10, 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set DCCISW
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_DCCISW(uint32_t value)
|
|
||||||
{
|
|
||||||
// __ASM volatile("MCR p15, 0, %0, c7, c14, 2" : : "r"(value) : "memory")
|
|
||||||
__set_CP(15, 0, value, 7, 14, 2);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@ -1,917 +0,0 @@
|
|||||||
/**************************************************************************//**
|
|
||||||
* @file cmsis_gcc.h
|
|
||||||
* @brief CMSIS compiler specific macros, functions, instructions
|
|
||||||
* @version V1.3.2
|
|
||||||
* @date 24. March 2022
|
|
||||||
******************************************************************************/
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2009-2022 Arm Limited. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef __CMSIS_GCC_H
|
|
||||||
#define __CMSIS_GCC_H
|
|
||||||
|
|
||||||
/* ignore some GCC warnings */
|
|
||||||
#pragma GCC diagnostic push
|
|
||||||
#pragma GCC diagnostic ignored "-Wsign-conversion"
|
|
||||||
#pragma GCC diagnostic ignored "-Wconversion"
|
|
||||||
#pragma GCC diagnostic ignored "-Wunused-parameter"
|
|
||||||
|
|
||||||
/* Fallback for __has_builtin */
|
|
||||||
#ifndef __has_builtin
|
|
||||||
#define __has_builtin(x) (0)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* CMSIS compiler specific defines */
|
|
||||||
#ifndef __ASM
|
|
||||||
#define __ASM __asm
|
|
||||||
#endif
|
|
||||||
#ifndef __INLINE
|
|
||||||
#define __INLINE inline
|
|
||||||
#endif
|
|
||||||
#ifndef __FORCEINLINE
|
|
||||||
#define __FORCEINLINE __attribute__((always_inline))
|
|
||||||
#endif
|
|
||||||
#ifndef __STATIC_INLINE
|
|
||||||
#define __STATIC_INLINE static inline
|
|
||||||
#endif
|
|
||||||
#ifndef __STATIC_FORCEINLINE
|
|
||||||
#define __STATIC_FORCEINLINE __attribute__((always_inline)) static inline
|
|
||||||
#endif
|
|
||||||
#ifndef __NO_RETURN
|
|
||||||
#define __NO_RETURN __attribute__((__noreturn__))
|
|
||||||
#endif
|
|
||||||
#ifndef CMSIS_DEPRECATED
|
|
||||||
#define CMSIS_DEPRECATED __attribute__((deprecated))
|
|
||||||
#endif
|
|
||||||
#ifndef __USED
|
|
||||||
#define __USED __attribute__((used))
|
|
||||||
#endif
|
|
||||||
#ifndef __WEAK
|
|
||||||
#define __WEAK __attribute__((weak))
|
|
||||||
#endif
|
|
||||||
#ifndef __PACKED
|
|
||||||
#define __PACKED __attribute__((packed, aligned(1)))
|
|
||||||
#endif
|
|
||||||
#ifndef __PACKED_STRUCT
|
|
||||||
#define __PACKED_STRUCT struct __attribute__((packed, aligned(1)))
|
|
||||||
#endif
|
|
||||||
#ifndef __UNALIGNED_UINT16_WRITE
|
|
||||||
#pragma GCC diagnostic push
|
|
||||||
#pragma GCC diagnostic ignored "-Wpacked"
|
|
||||||
/*lint -esym(9058, T_UINT16_WRITE)*/ /* disable MISRA 2012 Rule 2.4 for T_UINT16_WRITE */
|
|
||||||
__PACKED_STRUCT T_UINT16_WRITE { uint16_t v; };
|
|
||||||
#pragma GCC diagnostic pop
|
|
||||||
#define __UNALIGNED_UINT16_WRITE(addr, val) (void)((((struct T_UINT16_WRITE *)(void *)(addr))->v) = (val))
|
|
||||||
#endif
|
|
||||||
#ifndef __UNALIGNED_UINT16_READ
|
|
||||||
#pragma GCC diagnostic push
|
|
||||||
#pragma GCC diagnostic ignored "-Wpacked"
|
|
||||||
/*lint -esym(9058, T_UINT16_READ)*/ /* disable MISRA 2012 Rule 2.4 for T_UINT16_READ */
|
|
||||||
__PACKED_STRUCT T_UINT16_READ { uint16_t v; };
|
|
||||||
#pragma GCC diagnostic pop
|
|
||||||
#define __UNALIGNED_UINT16_READ(addr) (((const struct T_UINT16_READ *)(const void *)(addr))->v)
|
|
||||||
#endif
|
|
||||||
#ifndef __UNALIGNED_UINT32_WRITE
|
|
||||||
#pragma GCC diagnostic push
|
|
||||||
#pragma GCC diagnostic ignored "-Wpacked"
|
|
||||||
/*lint -esym(9058, T_UINT32_WRITE)*/ /* disable MISRA 2012 Rule 2.4 for T_UINT32_WRITE */
|
|
||||||
__PACKED_STRUCT T_UINT32_WRITE { uint32_t v; };
|
|
||||||
#pragma GCC diagnostic pop
|
|
||||||
#define __UNALIGNED_UINT32_WRITE(addr, val) (void)((((struct T_UINT32_WRITE *)(void *)(addr))->v) = (val))
|
|
||||||
#endif
|
|
||||||
#ifndef __UNALIGNED_UINT32_READ
|
|
||||||
#pragma GCC diagnostic push
|
|
||||||
#pragma GCC diagnostic ignored "-Wpacked"
|
|
||||||
__PACKED_STRUCT T_UINT32_READ { uint32_t v; };
|
|
||||||
#pragma GCC diagnostic pop
|
|
||||||
#define __UNALIGNED_UINT32_READ(addr) (((const struct T_UINT32_READ *)(const void *)(addr))->v)
|
|
||||||
#endif
|
|
||||||
#ifndef __ALIGNED
|
|
||||||
#define __ALIGNED(x) __attribute__((aligned(x)))
|
|
||||||
#endif
|
|
||||||
#ifndef __COMPILER_BARRIER
|
|
||||||
#define __COMPILER_BARRIER() __ASM volatile("":::"memory")
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint32_t __QSUB16(uint32_t op1, uint32_t op2)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("qsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint32_t __QSUB8(uint32_t op1, uint32_t op2)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("qsub8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint32_t __QADD16(uint32_t op1, uint32_t op2)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("qadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint32_t __QADD8(uint32_t op1, uint32_t op2)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("qadd8 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE int32_t __QADD( int32_t op1, int32_t op2)
|
|
||||||
{
|
|
||||||
int32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("qadd %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint32_t __QSAX(uint32_t op1, uint32_t op2)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM ("qsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint32_t __SHSAX(uint32_t op1, uint32_t op2)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM ("shsax %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint64_t __SMLALD (uint32_t op1, uint32_t op2, uint64_t acc)
|
|
||||||
{
|
|
||||||
union llreg_u{
|
|
||||||
uint32_t w32[2];
|
|
||||||
uint64_t w64;
|
|
||||||
} llr;
|
|
||||||
llr.w64 = acc;
|
|
||||||
|
|
||||||
#ifndef __ARMEB__ /* Little endian */
|
|
||||||
__ASM volatile ("smlald %0, %1, %2, %3" : "=r" (llr.w32[0]), "=r" (llr.w32[1]): "r" (op1), "r" (op2) , "0" (llr.w32[0]), "1" (llr.w32[1]) );
|
|
||||||
#else /* Big endian */
|
|
||||||
__ASM volatile ("smlald %0, %1, %2, %3" : "=r" (llr.w32[1]), "=r" (llr.w32[0]): "r" (op1), "r" (op2) , "0" (llr.w32[1]), "1" (llr.w32[0]) );
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return(llr.w64);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE int32_t __QSUB( int32_t op1, int32_t op2)
|
|
||||||
{
|
|
||||||
int32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("qsub %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint32_t __SXTB16(uint32_t op1)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM ("sxtb16 %0, %1" : "=r" (result) : "r" (op1));
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint32_t __SMUAD (uint32_t op1, uint32_t op2)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("smuad %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#define __PKHBT(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0x0000FFFFUL) | \
|
|
||||||
((((uint32_t)(ARG2)) << (ARG3)) & 0xFFFF0000UL) )
|
|
||||||
|
|
||||||
#define __PKHTB(ARG1,ARG2,ARG3) ( ((((uint32_t)(ARG1)) ) & 0xFFFF0000UL) | \
|
|
||||||
((((uint32_t)(ARG2)) >> (ARG3)) & 0x0000FFFFUL) )
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint32_t __SMLAD (uint32_t op1, uint32_t op2, uint32_t op3)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("smlad %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint32_t __SMUADX (uint32_t op1, uint32_t op2)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("smuadx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint32_t __SMLADX (uint32_t op1, uint32_t op2, uint32_t op3)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("smladx %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint64_t __SMLALDX (uint32_t op1, uint32_t op2, uint64_t acc)
|
|
||||||
{
|
|
||||||
union llreg_u{
|
|
||||||
uint32_t w32[2];
|
|
||||||
uint64_t w64;
|
|
||||||
} llr;
|
|
||||||
llr.w64 = acc;
|
|
||||||
|
|
||||||
#ifndef __ARMEB__ /* Little endian */
|
|
||||||
__ASM volatile ("smlaldx %0, %1, %2, %3" : "=r" (llr.w32[0]), "=r" (llr.w32[1]): "r" (op1), "r" (op2) , "0" (llr.w32[0]), "1" (llr.w32[1]) );
|
|
||||||
#else /* Big endian */
|
|
||||||
__ASM volatile ("smlaldx %0, %1, %2, %3" : "=r" (llr.w32[1]), "=r" (llr.w32[0]): "r" (op1), "r" (op2) , "0" (llr.w32[1]), "1" (llr.w32[0]) );
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return(llr.w64);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE int32_t __SMMLA (int32_t op1, int32_t op2, int32_t op3)
|
|
||||||
{
|
|
||||||
int32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("smmla %0, %1, %2, %3" : "=r" (result): "r" (op1), "r" (op2), "r" (op3) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint32_t __SMUSD (uint32_t op1, uint32_t op2)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("smusd %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint32_t __SMUSDX (uint32_t op1, uint32_t op2)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("smusdx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint32_t __QASX(uint32_t op1, uint32_t op2)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM ("qasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint32_t __SHADD16(uint32_t op1, uint32_t op2)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM ("shadd16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint32_t __SHSUB16(uint32_t op1, uint32_t op2)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM ("shsub16 %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint32_t __SHASX(uint32_t op1, uint32_t op2)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM ("shasx %0, %1, %2" : "=r" (result) : "r" (op1), "r" (op2) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
__STATIC_FORCEINLINE uint32_t __SMLSDX (uint32_t op1, uint32_t op2, uint32_t op3)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("smlsdx %0, %1, %2, %3" : "=r" (result) : "r" (op1), "r" (op2), "r" (op3) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/* ########################## Core Instruction Access ######################### */
|
|
||||||
/**
|
|
||||||
\brief No Operation
|
|
||||||
*/
|
|
||||||
#define __NOP() __ASM volatile ("nop")
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Wait For Interrupt
|
|
||||||
*/
|
|
||||||
#define __WFI() __ASM volatile ("wfi":::"memory")
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Wait For Event
|
|
||||||
*/
|
|
||||||
#define __WFE() __ASM volatile ("wfe":::"memory")
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Send Event
|
|
||||||
*/
|
|
||||||
#define __SEV() __ASM volatile ("sev")
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Instruction Synchronization Barrier
|
|
||||||
\details Instruction Synchronization Barrier flushes the pipeline in the processor,
|
|
||||||
so that all instructions following the ISB are fetched from cache or memory,
|
|
||||||
after the instruction has been completed.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __ISB(void)
|
|
||||||
{
|
|
||||||
__ASM volatile ("isb 0xF":::"memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Data Synchronization Barrier
|
|
||||||
\details Acts as a special kind of Data Memory Barrier.
|
|
||||||
It completes when all explicit memory accesses before this instruction complete.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __DSB(void)
|
|
||||||
{
|
|
||||||
__ASM volatile ("dsb 0xF":::"memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Data Memory Barrier
|
|
||||||
\details Ensures the apparent order of the explicit memory operations before
|
|
||||||
and after the instruction, without ensuring their completion.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __DMB(void)
|
|
||||||
{
|
|
||||||
__ASM volatile ("dmb 0xF":::"memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Reverse byte order (32 bit)
|
|
||||||
\details Reverses the byte order in unsigned integer value. For example, 0x12345678 becomes 0x78563412.
|
|
||||||
\param [in] value Value to reverse
|
|
||||||
\return Reversed value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __REV(uint32_t value)
|
|
||||||
{
|
|
||||||
#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
|
|
||||||
return __builtin_bswap32(value);
|
|
||||||
#else
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM ("rev %0, %1" : "=r" (result) : "r" (value) );
|
|
||||||
return result;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Reverse byte order (16 bit)
|
|
||||||
\details Reverses the byte order within each halfword of a word. For example, 0x12345678 becomes 0x34127856.
|
|
||||||
\param [in] value Value to reverse
|
|
||||||
\return Reversed value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __REV16(uint32_t value)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__ASM ("rev16 %0, %1" : "=r" (result) : "r" (value));
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Reverse byte order (16 bit)
|
|
||||||
\details Reverses the byte order in a 16-bit value and returns the signed 16-bit result. For example, 0x0080 becomes 0x8000.
|
|
||||||
\param [in] value Value to reverse
|
|
||||||
\return Reversed value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE int16_t __REVSH(int16_t value)
|
|
||||||
{
|
|
||||||
#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
|
|
||||||
return (int16_t)__builtin_bswap16(value);
|
|
||||||
#else
|
|
||||||
int16_t result;
|
|
||||||
|
|
||||||
__ASM ("revsh %0, %1" : "=r" (result) : "r" (value) );
|
|
||||||
return result;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Rotate Right in unsigned value (32 bit)
|
|
||||||
\details Rotate Right (immediate) provides the value of the contents of a register rotated by a variable number of bits.
|
|
||||||
\param [in] op1 Value to rotate
|
|
||||||
\param [in] op2 Number of Bits to rotate
|
|
||||||
\return Rotated value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __ROR(uint32_t op1, uint32_t op2)
|
|
||||||
{
|
|
||||||
op2 %= 32U;
|
|
||||||
if (op2 == 0U)
|
|
||||||
{
|
|
||||||
return op1;
|
|
||||||
}
|
|
||||||
return (op1 >> op2) | (op1 << (32U - op2));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Breakpoint
|
|
||||||
\param [in] value is ignored by the processor.
|
|
||||||
If required, a debugger can use it to store additional information about the breakpoint.
|
|
||||||
*/
|
|
||||||
#define __BKPT(value) __ASM volatile ("bkpt "#value)
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Reverse bit order of value
|
|
||||||
\details Reverses the bit order of the given value.
|
|
||||||
\param [in] value Value to reverse
|
|
||||||
\return Reversed value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __RBIT(uint32_t value)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__ASM ("rbit %0, %1" : "=r" (result) : "r" (value) );
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Count leading zeros
|
|
||||||
\param [in] value Value to count the leading zeros
|
|
||||||
\return number of leading zeros in value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint8_t __CLZ(uint32_t value)
|
|
||||||
{
|
|
||||||
/* Even though __builtin_clz produces a CLZ instruction on ARM, formally
|
|
||||||
__builtin_clz(0) is undefined behaviour, so handle this case specially.
|
|
||||||
This guarantees ARM-compatible results if happening to compile on a non-ARM
|
|
||||||
target, and ensures the compiler doesn't decide to activate any
|
|
||||||
optimisations using the logic "value was passed to __builtin_clz, so it
|
|
||||||
is non-zero".
|
|
||||||
ARM GCC 7.3 and possibly earlier will optimise this test away, leaving a
|
|
||||||
single CLZ instruction.
|
|
||||||
*/
|
|
||||||
if (value == 0U)
|
|
||||||
{
|
|
||||||
return 32U;
|
|
||||||
}
|
|
||||||
return __builtin_clz(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief LDR Exclusive (8 bit)
|
|
||||||
\details Executes a exclusive LDR instruction for 8 bit value.
|
|
||||||
\param [in] ptr Pointer to data
|
|
||||||
\return value of type uint8_t at (*ptr)
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint8_t __LDREXB(volatile uint8_t *addr)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
|
|
||||||
__ASM volatile ("ldrexb %0, %1" : "=r" (result) : "Q" (*addr) );
|
|
||||||
#else
|
|
||||||
/* Prior to GCC 4.8, "Q" will be expanded to [rx, #0] which is not
|
|
||||||
accepted by assembler. So has to use following less efficient pattern.
|
|
||||||
*/
|
|
||||||
__ASM volatile ("ldrexb %0, [%1]" : "=r" (result) : "r" (addr) : "memory" );
|
|
||||||
#endif
|
|
||||||
return ((uint8_t) result); /* Add explicit type cast here */
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief LDR Exclusive (16 bit)
|
|
||||||
\details Executes a exclusive LDR instruction for 16 bit values.
|
|
||||||
\param [in] ptr Pointer to data
|
|
||||||
\return value of type uint16_t at (*ptr)
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint16_t __LDREXH(volatile uint16_t *addr)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
#if (__GNUC__ > 4) || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)
|
|
||||||
__ASM volatile ("ldrexh %0, %1" : "=r" (result) : "Q" (*addr) );
|
|
||||||
#else
|
|
||||||
/* Prior to GCC 4.8, "Q" will be expanded to [rx, #0] which is not
|
|
||||||
accepted by assembler. So has to use following less efficient pattern.
|
|
||||||
*/
|
|
||||||
__ASM volatile ("ldrexh %0, [%1]" : "=r" (result) : "r" (addr) : "memory" );
|
|
||||||
#endif
|
|
||||||
return ((uint16_t) result); /* Add explicit type cast here */
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief LDR Exclusive (32 bit)
|
|
||||||
\details Executes a exclusive LDR instruction for 32 bit values.
|
|
||||||
\param [in] ptr Pointer to data
|
|
||||||
\return value of type uint32_t at (*ptr)
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __LDREXW(volatile uint32_t *addr)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("ldrex %0, %1" : "=r" (result) : "Q" (*addr) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief STR Exclusive (8 bit)
|
|
||||||
\details Executes a exclusive STR instruction for 8 bit values.
|
|
||||||
\param [in] value Value to store
|
|
||||||
\param [in] ptr Pointer to location
|
|
||||||
\return 0 Function succeeded
|
|
||||||
\return 1 Function failed
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __STREXB(uint8_t value, volatile uint8_t *addr)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("strexb %0, %2, %1" : "=&r" (result), "=Q" (*addr) : "r" ((uint32_t)value) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief STR Exclusive (16 bit)
|
|
||||||
\details Executes a exclusive STR instruction for 16 bit values.
|
|
||||||
\param [in] value Value to store
|
|
||||||
\param [in] ptr Pointer to location
|
|
||||||
\return 0 Function succeeded
|
|
||||||
\return 1 Function failed
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __STREXH(uint16_t value, volatile uint16_t *addr)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("strexh %0, %2, %1" : "=&r" (result), "=Q" (*addr) : "r" ((uint32_t)value) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief STR Exclusive (32 bit)
|
|
||||||
\details Executes a exclusive STR instruction for 32 bit values.
|
|
||||||
\param [in] value Value to store
|
|
||||||
\param [in] ptr Pointer to location
|
|
||||||
\return 0 Function succeeded
|
|
||||||
\return 1 Function failed
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __STREXW(uint32_t value, volatile uint32_t *addr)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("strex %0, %2, %1" : "=&r" (result), "=Q" (*addr) : "r" (value) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Remove the exclusive lock
|
|
||||||
\details Removes the exclusive lock which is created by LDREX.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __CLREX(void)
|
|
||||||
{
|
|
||||||
__ASM volatile ("clrex" ::: "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Signed Saturate
|
|
||||||
\details Saturates a signed value.
|
|
||||||
\param [in] value Value to be saturated
|
|
||||||
\param [in] sat Bit position to saturate to (1..32)
|
|
||||||
\return Saturated value
|
|
||||||
*/
|
|
||||||
#define __SSAT(ARG1, ARG2) \
|
|
||||||
__extension__ \
|
|
||||||
({ \
|
|
||||||
int32_t __RES, __ARG1 = (ARG1); \
|
|
||||||
__ASM volatile ("ssat %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) : "cc" ); \
|
|
||||||
__RES; \
|
|
||||||
})
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Unsigned Saturate
|
|
||||||
\details Saturates an unsigned value.
|
|
||||||
\param [in] value Value to be saturated
|
|
||||||
\param [in] sat Bit position to saturate to (0..31)
|
|
||||||
\return Saturated value
|
|
||||||
*/
|
|
||||||
#define __USAT(ARG1, ARG2) \
|
|
||||||
__extension__ \
|
|
||||||
({ \
|
|
||||||
uint32_t __RES, __ARG1 = (ARG1); \
|
|
||||||
__ASM volatile ("usat %0, %1, %2" : "=r" (__RES) : "I" (ARG2), "r" (__ARG1) : "cc" ); \
|
|
||||||
__RES; \
|
|
||||||
})
|
|
||||||
|
|
||||||
/* ########################### Core Function Access ########################### */
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Enable IRQ Interrupts
|
|
||||||
\details Enables IRQ interrupts by clearing the I-bit in the CPSR.
|
|
||||||
Can only be executed in Privileged modes.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __enable_irq(void)
|
|
||||||
{
|
|
||||||
__ASM volatile ("cpsie i" : : : "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Disable IRQ Interrupts
|
|
||||||
\details Disables IRQ interrupts by setting the I-bit in the CPSR.
|
|
||||||
Can only be executed in Privileged modes.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __disable_irq(void)
|
|
||||||
{
|
|
||||||
__ASM volatile ("cpsid i" : : : "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Enable FIQ
|
|
||||||
\details Enables FIQ interrupts by clearing the F-bit in the CPSR.
|
|
||||||
Can only be executed in Privileged modes.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __enable_fault_irq(void)
|
|
||||||
{
|
|
||||||
__ASM volatile ("cpsie f" : : : "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Disable FIQ
|
|
||||||
\details Disables FIQ interrupts by setting the F-bit in the CPSR.
|
|
||||||
Can only be executed in Privileged modes.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __disable_fault_irq(void)
|
|
||||||
{
|
|
||||||
__ASM volatile ("cpsid f" : : : "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Get FPSCR
|
|
||||||
\details Returns the current value of the Floating Point Status/Control register.
|
|
||||||
\return Floating Point Status/Control register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_FPSCR(void)
|
|
||||||
{
|
|
||||||
#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
|
|
||||||
(defined (__FPU_USED ) && (__FPU_USED == 1U)) )
|
|
||||||
#if __has_builtin(__builtin_arm_get_fpscr)
|
|
||||||
// Re-enable using built-in when GCC has been fixed
|
|
||||||
// || (__GNUC__ > 7) || (__GNUC__ == 7 && __GNUC_MINOR__ >= 2)
|
|
||||||
/* see https://gcc.gnu.org/ml/gcc-patches/2017-04/msg00443.html */
|
|
||||||
return __builtin_arm_get_fpscr();
|
|
||||||
#else
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
__ASM volatile ("VMRS %0, fpscr" : "=r" (result) );
|
|
||||||
return(result);
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
return(0U);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
\brief Set FPSCR
|
|
||||||
\details Assigns the given value to the Floating Point Status/Control register.
|
|
||||||
\param [in] fpscr Floating Point Status/Control value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_FPSCR(uint32_t fpscr)
|
|
||||||
{
|
|
||||||
#if ((defined (__FPU_PRESENT) && (__FPU_PRESENT == 1U)) && \
|
|
||||||
(defined (__FPU_USED ) && (__FPU_USED == 1U)) )
|
|
||||||
#if __has_builtin(__builtin_arm_set_fpscr)
|
|
||||||
// Re-enable using built-in when GCC has been fixed
|
|
||||||
// || (__GNUC__ > 7) || (__GNUC__ == 7 && __GNUC_MINOR__ >= 2)
|
|
||||||
/* see https://gcc.gnu.org/ml/gcc-patches/2017-04/msg00443.html */
|
|
||||||
__builtin_arm_set_fpscr(fpscr);
|
|
||||||
#else
|
|
||||||
__ASM volatile ("VMSR fpscr, %0" : : "r" (fpscr) : "vfpcc", "memory");
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
(void)fpscr;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get CPSR Register
|
|
||||||
\return CPSR Register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_CPSR(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__ASM volatile("MRS %0, cpsr" : "=r" (result) );
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set CPSR Register
|
|
||||||
\param [in] cpsr CPSR value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_CPSR(uint32_t cpsr)
|
|
||||||
{
|
|
||||||
__ASM volatile ("MSR cpsr, %0" : : "r" (cpsr) : "cc", "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get Mode
|
|
||||||
\return Processor Mode
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_mode(void)
|
|
||||||
{
|
|
||||||
return (__get_CPSR() & 0x1FU);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set Mode
|
|
||||||
\param [in] mode Mode value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_mode(uint32_t mode)
|
|
||||||
{
|
|
||||||
__ASM volatile("MSR cpsr_c, %0" : : "r" (mode) : "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get Stack Pointer
|
|
||||||
\return Stack Pointer value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_SP(void)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__ASM volatile("MOV %0, sp" : "=r" (result) : : "memory");
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set Stack Pointer
|
|
||||||
\param [in] stack Stack Pointer value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_SP(uint32_t stack)
|
|
||||||
{
|
|
||||||
__ASM volatile("MOV sp, %0" : : "r" (stack) : "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get USR/SYS Stack Pointer
|
|
||||||
\return USR/SYS Stack Pointer value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_SP_usr(void)
|
|
||||||
{
|
|
||||||
uint32_t cpsr = __get_CPSR();
|
|
||||||
uint32_t result;
|
|
||||||
__ASM volatile(
|
|
||||||
"CPS #0x1F \n"
|
|
||||||
"MOV %0, sp " : "=r"(result) : : "memory"
|
|
||||||
);
|
|
||||||
__set_CPSR(cpsr);
|
|
||||||
__ISB();
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set USR/SYS Stack Pointer
|
|
||||||
\param [in] topOfProcStack USR/SYS Stack Pointer value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_SP_usr(uint32_t topOfProcStack)
|
|
||||||
{
|
|
||||||
uint32_t cpsr = __get_CPSR();
|
|
||||||
__ASM volatile(
|
|
||||||
"CPS #0x1F \n"
|
|
||||||
"MOV sp, %0 " : : "r" (topOfProcStack) : "memory"
|
|
||||||
);
|
|
||||||
__set_CPSR(cpsr);
|
|
||||||
__ISB();
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Get FPEXC
|
|
||||||
\return Floating Point Exception Control register value
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t __get_FPEXC(void)
|
|
||||||
{
|
|
||||||
#if (__FPU_PRESENT == 1)
|
|
||||||
uint32_t result;
|
|
||||||
__ASM volatile("VMRS %0, fpexc" : "=r" (result) : : "memory");
|
|
||||||
return(result);
|
|
||||||
#else
|
|
||||||
return(0);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/** \brief Set FPEXC
|
|
||||||
\param [in] fpexc Floating Point Exception Control value to set
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void __set_FPEXC(uint32_t fpexc)
|
|
||||||
{
|
|
||||||
#if (__FPU_PRESENT == 1)
|
|
||||||
__ASM volatile ("VMSR fpexc, %0" : : "r" (fpexc) : "memory");
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Include common core functions to access Coprocessor 15 registers
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define __get_CP(cp, op1, Rt, CRn, CRm, op2) __ASM volatile("MRC p" # cp ", " # op1 ", %0, c" # CRn ", c" # CRm ", " # op2 : "=r" (Rt) : : "memory" )
|
|
||||||
#define __set_CP(cp, op1, Rt, CRn, CRm, op2) __ASM volatile("MCR p" # cp ", " # op1 ", %0, c" # CRn ", c" # CRm ", " # op2 : : "r" (Rt) : "memory" )
|
|
||||||
#define __get_CP64(cp, op1, Rt, CRm) __ASM volatile("MRRC p" # cp ", " # op1 ", %Q0, %R0, c" # CRm : "=r" (Rt) : : "memory" )
|
|
||||||
#define __set_CP64(cp, op1, Rt, CRm) __ASM volatile("MCRR p" # cp ", " # op1 ", %Q0, %R0, c" # CRm : : "r" (Rt) : "memory" )
|
|
||||||
|
|
||||||
#include "cmsis_cp15.h"
|
|
||||||
|
|
||||||
/** \brief Enable Floating Point Unit
|
|
||||||
|
|
||||||
Critical section, called from undef handler, so systick is disabled
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE void __FPU_Enable(void)
|
|
||||||
{
|
|
||||||
__ASM volatile(
|
|
||||||
//Permit access to VFP/NEON, registers by modifying CPACR
|
|
||||||
" MRC p15,0,R1,c1,c0,2 \n"
|
|
||||||
" ORR R1,R1,#0x00F00000 \n"
|
|
||||||
" MCR p15,0,R1,c1,c0,2 \n"
|
|
||||||
|
|
||||||
//Ensure that subsequent instructions occur in the context of VFP/NEON access permitted
|
|
||||||
" ISB \n"
|
|
||||||
|
|
||||||
//Enable VFP/NEON
|
|
||||||
" VMRS R1,FPEXC \n"
|
|
||||||
" ORR R1,R1,#0x40000000 \n"
|
|
||||||
" VMSR FPEXC,R1 \n"
|
|
||||||
|
|
||||||
//Initialise VFP/NEON registers to 0
|
|
||||||
" MOV R2,#0 \n"
|
|
||||||
|
|
||||||
//Initialise D16 registers to 0
|
|
||||||
" VMOV D0, R2,R2 \n"
|
|
||||||
" VMOV D1, R2,R2 \n"
|
|
||||||
" VMOV D2, R2,R2 \n"
|
|
||||||
" VMOV D3, R2,R2 \n"
|
|
||||||
" VMOV D4, R2,R2 \n"
|
|
||||||
" VMOV D5, R2,R2 \n"
|
|
||||||
" VMOV D6, R2,R2 \n"
|
|
||||||
" VMOV D7, R2,R2 \n"
|
|
||||||
" VMOV D8, R2,R2 \n"
|
|
||||||
" VMOV D9, R2,R2 \n"
|
|
||||||
" VMOV D10,R2,R2 \n"
|
|
||||||
" VMOV D11,R2,R2 \n"
|
|
||||||
" VMOV D12,R2,R2 \n"
|
|
||||||
" VMOV D13,R2,R2 \n"
|
|
||||||
" VMOV D14,R2,R2 \n"
|
|
||||||
" VMOV D15,R2,R2 \n"
|
|
||||||
|
|
||||||
#if (defined(__ARM_NEON) && (__ARM_NEON == 1))
|
|
||||||
//Initialise D32 registers to 0
|
|
||||||
" VMOV D16,R2,R2 \n"
|
|
||||||
" VMOV D17,R2,R2 \n"
|
|
||||||
" VMOV D18,R2,R2 \n"
|
|
||||||
" VMOV D19,R2,R2 \n"
|
|
||||||
" VMOV D20,R2,R2 \n"
|
|
||||||
" VMOV D21,R2,R2 \n"
|
|
||||||
" VMOV D22,R2,R2 \n"
|
|
||||||
" VMOV D23,R2,R2 \n"
|
|
||||||
" VMOV D24,R2,R2 \n"
|
|
||||||
" VMOV D25,R2,R2 \n"
|
|
||||||
" VMOV D26,R2,R2 \n"
|
|
||||||
" VMOV D27,R2,R2 \n"
|
|
||||||
" VMOV D28,R2,R2 \n"
|
|
||||||
" VMOV D29,R2,R2 \n"
|
|
||||||
" VMOV D30,R2,R2 \n"
|
|
||||||
" VMOV D31,R2,R2 \n"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//Initialise FPSCR to a known state
|
|
||||||
" VMRS R1,FPSCR \n"
|
|
||||||
" LDR R2,=0x00086060 \n" //Mask off all bits that do not have to be preserved. Non-preserved bits can/should be zero.
|
|
||||||
" AND R1,R1,R2 \n"
|
|
||||||
" VMSR FPSCR,R1 "
|
|
||||||
: : : "cc", "r1", "r2"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#pragma GCC diagnostic pop
|
|
||||||
|
|
||||||
#endif /* __CMSIS_GCC_H */
|
|
||||||
@ -1,573 +0,0 @@
|
|||||||
/**************************************************************************//**
|
|
||||||
* @file cmsis_iccarm.h
|
|
||||||
* @brief CMSIS compiler ICCARM (IAR Compiler for Arm) header file
|
|
||||||
* @version V5.0.7
|
|
||||||
* @date 15. May 2019
|
|
||||||
******************************************************************************/
|
|
||||||
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Copyright (c) 2017-2018 IAR Systems
|
|
||||||
// Copyright (c) 2018-2019 Arm Limited
|
|
||||||
//
|
|
||||||
// SPDX-License-Identifier: Apache-2.0
|
|
||||||
//
|
|
||||||
// Licensed under the Apache License, Version 2.0 (the "License")
|
|
||||||
// you may not use this file except in compliance with the License.
|
|
||||||
// You may obtain a copy of the License at
|
|
||||||
// http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
//
|
|
||||||
// Unless required by applicable law or agreed to in writing, software
|
|
||||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
// See the License for the specific language governing permissions and
|
|
||||||
// limitations under the License.
|
|
||||||
//
|
|
||||||
//------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef __CMSIS_ICCARM_H__
|
|
||||||
#define __CMSIS_ICCARM_H__
|
|
||||||
|
|
||||||
#ifndef __ICCARM__
|
|
||||||
#error This file should only be compiled by ICCARM
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#pragma system_include
|
|
||||||
|
|
||||||
#define __IAR_FT _Pragma("inline=forced") __intrinsic
|
|
||||||
|
|
||||||
#if (__VER__ >= 8000000)
|
|
||||||
#define __ICCARM_V8 1
|
|
||||||
#else
|
|
||||||
#define __ICCARM_V8 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#pragma language=extended
|
|
||||||
|
|
||||||
#ifndef __ALIGNED
|
|
||||||
#if __ICCARM_V8
|
|
||||||
#define __ALIGNED(x) __attribute__((aligned(x)))
|
|
||||||
#elif (__VER__ >= 7080000)
|
|
||||||
/* Needs IAR language extensions */
|
|
||||||
#define __ALIGNED(x) __attribute__((aligned(x)))
|
|
||||||
#else
|
|
||||||
#warning No compiler specific solution for __ALIGNED.__ALIGNED is ignored.
|
|
||||||
#define __ALIGNED(x)
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/* Define compiler macros for CPU architecture, used in CMSIS 5.
|
|
||||||
*/
|
|
||||||
#if __ARM_ARCH_7A__
|
|
||||||
/* Macro already defined */
|
|
||||||
#else
|
|
||||||
#if defined(__ARM7A__)
|
|
||||||
#define __ARM_ARCH_7A__ 1
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __ASM
|
|
||||||
#define __ASM __asm
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __COMPILER_BARRIER
|
|
||||||
#define __COMPILER_BARRIER() __ASM volatile("":::"memory")
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __INLINE
|
|
||||||
#define __INLINE inline
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __NO_RETURN
|
|
||||||
#if __ICCARM_V8
|
|
||||||
#define __NO_RETURN __attribute__((__noreturn__))
|
|
||||||
#else
|
|
||||||
#define __NO_RETURN _Pragma("object_attribute=__noreturn")
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __PACKED
|
|
||||||
/* Needs IAR language extensions */
|
|
||||||
#if __ICCARM_V8
|
|
||||||
#define __PACKED __attribute__((packed, aligned(1)))
|
|
||||||
#else
|
|
||||||
#define __PACKED __packed
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __PACKED_STRUCT
|
|
||||||
/* Needs IAR language extensions */
|
|
||||||
#if __ICCARM_V8
|
|
||||||
#define __PACKED_STRUCT struct __attribute__((packed, aligned(1)))
|
|
||||||
#else
|
|
||||||
#define __PACKED_STRUCT __packed struct
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __PACKED_UNION
|
|
||||||
/* Needs IAR language extensions */
|
|
||||||
#if __ICCARM_V8
|
|
||||||
#define __PACKED_UNION union __attribute__((packed, aligned(1)))
|
|
||||||
#else
|
|
||||||
#define __PACKED_UNION __packed union
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __RESTRICT
|
|
||||||
#if __ICCARM_V8
|
|
||||||
#define __RESTRICT __restrict
|
|
||||||
#else
|
|
||||||
/* Needs IAR language extensions */
|
|
||||||
#define __RESTRICT restrict
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __STATIC_INLINE
|
|
||||||
#define __STATIC_INLINE static inline
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __FORCEINLINE
|
|
||||||
#define __FORCEINLINE _Pragma("inline=forced")
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __STATIC_FORCEINLINE
|
|
||||||
#define __STATIC_FORCEINLINE __FORCEINLINE __STATIC_INLINE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef CMSIS_DEPRECATED
|
|
||||||
#define CMSIS_DEPRECATED __attribute__((deprecated))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __UNALIGNED_UINT16_READ
|
|
||||||
#pragma language=save
|
|
||||||
#pragma language=extended
|
|
||||||
__IAR_FT uint16_t __iar_uint16_read(void const *ptr)
|
|
||||||
{
|
|
||||||
return *(__packed uint16_t*)(ptr);
|
|
||||||
}
|
|
||||||
#pragma language=restore
|
|
||||||
#define __UNALIGNED_UINT16_READ(PTR) __iar_uint16_read(PTR)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef __UNALIGNED_UINT16_WRITE
|
|
||||||
#pragma language=save
|
|
||||||
#pragma language=extended
|
|
||||||
__IAR_FT void __iar_uint16_write(void const *ptr, uint16_t val)
|
|
||||||
{
|
|
||||||
*(__packed uint16_t*)(ptr) = val;;
|
|
||||||
}
|
|
||||||
#pragma language=restore
|
|
||||||
#define __UNALIGNED_UINT16_WRITE(PTR,VAL) __iar_uint16_write(PTR,VAL)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __UNALIGNED_UINT32_READ
|
|
||||||
#pragma language=save
|
|
||||||
#pragma language=extended
|
|
||||||
__IAR_FT uint32_t __iar_uint32_read(void const *ptr)
|
|
||||||
{
|
|
||||||
return *(__packed uint32_t*)(ptr);
|
|
||||||
}
|
|
||||||
#pragma language=restore
|
|
||||||
#define __UNALIGNED_UINT32_READ(PTR) __iar_uint32_read(PTR)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __UNALIGNED_UINT32_WRITE
|
|
||||||
#pragma language=save
|
|
||||||
#pragma language=extended
|
|
||||||
__IAR_FT void __iar_uint32_write(void const *ptr, uint32_t val)
|
|
||||||
{
|
|
||||||
*(__packed uint32_t*)(ptr) = val;;
|
|
||||||
}
|
|
||||||
#pragma language=restore
|
|
||||||
#define __UNALIGNED_UINT32_WRITE(PTR,VAL) __iar_uint32_write(PTR,VAL)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
#ifndef __UNALIGNED_UINT32 /* deprecated */
|
|
||||||
#pragma language=save
|
|
||||||
#pragma language=extended
|
|
||||||
__packed struct __iar_u32 { uint32_t v; };
|
|
||||||
#pragma language=restore
|
|
||||||
#define __UNALIGNED_UINT32(PTR) (((struct __iar_u32 *)(PTR))->v)
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __USED
|
|
||||||
#if __ICCARM_V8
|
|
||||||
#define __USED __attribute__((used))
|
|
||||||
#else
|
|
||||||
#define __USED _Pragma("__root")
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef __WEAK
|
|
||||||
#if __ICCARM_V8
|
|
||||||
#define __WEAK __attribute__((weak))
|
|
||||||
#else
|
|
||||||
#define __WEAK _Pragma("__weak")
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef __ICCARM_INTRINSICS_VERSION__
|
|
||||||
#define __ICCARM_INTRINSICS_VERSION__ 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if __ICCARM_INTRINSICS_VERSION__ == 2
|
|
||||||
|
|
||||||
#if defined(__CLZ)
|
|
||||||
#undef __CLZ
|
|
||||||
#endif
|
|
||||||
#if defined(__REVSH)
|
|
||||||
#undef __REVSH
|
|
||||||
#endif
|
|
||||||
#if defined(__RBIT)
|
|
||||||
#undef __RBIT
|
|
||||||
#endif
|
|
||||||
#if defined(__SSAT)
|
|
||||||
#undef __SSAT
|
|
||||||
#endif
|
|
||||||
#if defined(__USAT)
|
|
||||||
#undef __USAT
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "iccarm_builtin.h"
|
|
||||||
|
|
||||||
#define __enable_irq __iar_builtin_enable_interrupt
|
|
||||||
#define __disable_irq __iar_builtin_disable_interrupt
|
|
||||||
#define __enable_fault_irq __iar_builtin_enable_fiq
|
|
||||||
#define __disable_fault_irq __iar_builtin_disable_fiq
|
|
||||||
#define __arm_rsr __iar_builtin_rsr
|
|
||||||
#define __arm_wsr __iar_builtin_wsr
|
|
||||||
|
|
||||||
#if __FPU_PRESENT
|
|
||||||
#define __get_FPSCR() (__arm_rsr("FPSCR"))
|
|
||||||
#else
|
|
||||||
#define __get_FPSCR() ( 0 )
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define __set_FPSCR(VALUE) (__arm_wsr("FPSCR", VALUE))
|
|
||||||
|
|
||||||
#define __get_CPSR() (__arm_rsr("CPSR"))
|
|
||||||
#define __get_mode() (__get_CPSR() & 0x1FU)
|
|
||||||
|
|
||||||
#define __set_CPSR(VALUE) (__arm_wsr("CPSR", (VALUE)))
|
|
||||||
#define __set_mode(VALUE) (__arm_wsr("CPSR_c", (VALUE)))
|
|
||||||
|
|
||||||
|
|
||||||
#define __get_FPEXC() (__arm_rsr("FPEXC"))
|
|
||||||
#define __set_FPEXC(VALUE) (__arm_wsr("FPEXC", VALUE))
|
|
||||||
|
|
||||||
#define __get_CP(cp, op1, RT, CRn, CRm, op2) \
|
|
||||||
((RT) = __arm_rsr("p" # cp ":" # op1 ":c" # CRn ":c" # CRm ":" # op2))
|
|
||||||
|
|
||||||
#define __set_CP(cp, op1, RT, CRn, CRm, op2) \
|
|
||||||
(__arm_wsr("p" # cp ":" # op1 ":c" # CRn ":c" # CRm ":" # op2, (RT)))
|
|
||||||
|
|
||||||
#define __get_CP64(cp, op1, Rt, CRm) \
|
|
||||||
__ASM volatile("MRRC p" # cp ", " # op1 ", %Q0, %R0, c" # CRm : "=r" (Rt) : : "memory" )
|
|
||||||
|
|
||||||
#define __set_CP64(cp, op1, Rt, CRm) \
|
|
||||||
__ASM volatile("MCRR p" # cp ", " # op1 ", %Q0, %R0, c" # CRm : : "r" (Rt) : "memory" )
|
|
||||||
|
|
||||||
#include "cmsis_cp15.h"
|
|
||||||
|
|
||||||
#define __NOP __iar_builtin_no_operation
|
|
||||||
|
|
||||||
#define __CLZ __iar_builtin_CLZ
|
|
||||||
#define __CLREX __iar_builtin_CLREX
|
|
||||||
|
|
||||||
#define __DMB __iar_builtin_DMB
|
|
||||||
#define __DSB __iar_builtin_DSB
|
|
||||||
#define __ISB __iar_builtin_ISB
|
|
||||||
|
|
||||||
#define __LDREXB __iar_builtin_LDREXB
|
|
||||||
#define __LDREXH __iar_builtin_LDREXH
|
|
||||||
#define __LDREXW __iar_builtin_LDREX
|
|
||||||
|
|
||||||
#define __RBIT __iar_builtin_RBIT
|
|
||||||
#define __REV __iar_builtin_REV
|
|
||||||
#define __REV16 __iar_builtin_REV16
|
|
||||||
|
|
||||||
__IAR_FT int16_t __REVSH(int16_t val)
|
|
||||||
{
|
|
||||||
return (int16_t) __iar_builtin_REVSH(val);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define __ROR __iar_builtin_ROR
|
|
||||||
#define __RRX __iar_builtin_RRX
|
|
||||||
|
|
||||||
#define __SEV __iar_builtin_SEV
|
|
||||||
|
|
||||||
#define __SSAT __iar_builtin_SSAT
|
|
||||||
|
|
||||||
#define __STREXB __iar_builtin_STREXB
|
|
||||||
#define __STREXH __iar_builtin_STREXH
|
|
||||||
#define __STREXW __iar_builtin_STREX
|
|
||||||
|
|
||||||
#define __USAT __iar_builtin_USAT
|
|
||||||
|
|
||||||
#define __WFE __iar_builtin_WFE
|
|
||||||
#define __WFI __iar_builtin_WFI
|
|
||||||
|
|
||||||
#define __SADD8 __iar_builtin_SADD8
|
|
||||||
#define __QADD8 __iar_builtin_QADD8
|
|
||||||
#define __SHADD8 __iar_builtin_SHADD8
|
|
||||||
#define __UADD8 __iar_builtin_UADD8
|
|
||||||
#define __UQADD8 __iar_builtin_UQADD8
|
|
||||||
#define __UHADD8 __iar_builtin_UHADD8
|
|
||||||
#define __SSUB8 __iar_builtin_SSUB8
|
|
||||||
#define __QSUB8 __iar_builtin_QSUB8
|
|
||||||
#define __SHSUB8 __iar_builtin_SHSUB8
|
|
||||||
#define __USUB8 __iar_builtin_USUB8
|
|
||||||
#define __UQSUB8 __iar_builtin_UQSUB8
|
|
||||||
#define __UHSUB8 __iar_builtin_UHSUB8
|
|
||||||
#define __SADD16 __iar_builtin_SADD16
|
|
||||||
#define __QADD16 __iar_builtin_QADD16
|
|
||||||
#define __SHADD16 __iar_builtin_SHADD16
|
|
||||||
#define __UADD16 __iar_builtin_UADD16
|
|
||||||
#define __UQADD16 __iar_builtin_UQADD16
|
|
||||||
#define __UHADD16 __iar_builtin_UHADD16
|
|
||||||
#define __SSUB16 __iar_builtin_SSUB16
|
|
||||||
#define __QSUB16 __iar_builtin_QSUB16
|
|
||||||
#define __SHSUB16 __iar_builtin_SHSUB16
|
|
||||||
#define __USUB16 __iar_builtin_USUB16
|
|
||||||
#define __UQSUB16 __iar_builtin_UQSUB16
|
|
||||||
#define __UHSUB16 __iar_builtin_UHSUB16
|
|
||||||
#define __SASX __iar_builtin_SASX
|
|
||||||
#define __QASX __iar_builtin_QASX
|
|
||||||
#define __SHASX __iar_builtin_SHASX
|
|
||||||
#define __UASX __iar_builtin_UASX
|
|
||||||
#define __UQASX __iar_builtin_UQASX
|
|
||||||
#define __UHASX __iar_builtin_UHASX
|
|
||||||
#define __SSAX __iar_builtin_SSAX
|
|
||||||
#define __QSAX __iar_builtin_QSAX
|
|
||||||
#define __SHSAX __iar_builtin_SHSAX
|
|
||||||
#define __USAX __iar_builtin_USAX
|
|
||||||
#define __UQSAX __iar_builtin_UQSAX
|
|
||||||
#define __UHSAX __iar_builtin_UHSAX
|
|
||||||
#define __USAD8 __iar_builtin_USAD8
|
|
||||||
#define __USADA8 __iar_builtin_USADA8
|
|
||||||
#define __SSAT16 __iar_builtin_SSAT16
|
|
||||||
#define __USAT16 __iar_builtin_USAT16
|
|
||||||
#define __UXTB16 __iar_builtin_UXTB16
|
|
||||||
#define __UXTAB16 __iar_builtin_UXTAB16
|
|
||||||
#define __SXTB16 __iar_builtin_SXTB16
|
|
||||||
#define __SXTAB16 __iar_builtin_SXTAB16
|
|
||||||
#define __SMUAD __iar_builtin_SMUAD
|
|
||||||
#define __SMUADX __iar_builtin_SMUADX
|
|
||||||
#define __SMMLA __iar_builtin_SMMLA
|
|
||||||
#define __SMLAD __iar_builtin_SMLAD
|
|
||||||
#define __SMLADX __iar_builtin_SMLADX
|
|
||||||
#define __SMLALD __iar_builtin_SMLALD
|
|
||||||
#define __SMLALDX __iar_builtin_SMLALDX
|
|
||||||
#define __SMUSD __iar_builtin_SMUSD
|
|
||||||
#define __SMUSDX __iar_builtin_SMUSDX
|
|
||||||
#define __SMLSD __iar_builtin_SMLSD
|
|
||||||
#define __SMLSDX __iar_builtin_SMLSDX
|
|
||||||
#define __SMLSLD __iar_builtin_SMLSLD
|
|
||||||
#define __SMLSLDX __iar_builtin_SMLSLDX
|
|
||||||
#define __SEL __iar_builtin_SEL
|
|
||||||
#define __QADD __iar_builtin_QADD
|
|
||||||
#define __QSUB __iar_builtin_QSUB
|
|
||||||
#define __PKHBT __iar_builtin_PKHBT
|
|
||||||
#define __PKHTB __iar_builtin_PKHTB
|
|
||||||
|
|
||||||
#else /* __ICCARM_INTRINSICS_VERSION__ == 2 */
|
|
||||||
|
|
||||||
#if !__FPU_PRESENT
|
|
||||||
#define __get_FPSCR __cmsis_iar_get_FPSR_not_active
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __INTRINSICS_INCLUDED
|
|
||||||
#error intrinsics.h is already included previously!
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <intrinsics.h>
|
|
||||||
|
|
||||||
#if !__FPU_PRESENT
|
|
||||||
#define __get_FPSCR() (0)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#pragma diag_suppress=Pe940
|
|
||||||
#pragma diag_suppress=Pe177
|
|
||||||
|
|
||||||
#define __enable_irq __enable_interrupt
|
|
||||||
#define __disable_irq __disable_interrupt
|
|
||||||
#define __enable_fault_irq __enable_fiq
|
|
||||||
#define __disable_fault_irq __disable_fiq
|
|
||||||
#define __NOP __no_operation
|
|
||||||
|
|
||||||
#define __get_xPSR __get_PSR
|
|
||||||
|
|
||||||
__IAR_FT void __set_mode(uint32_t mode)
|
|
||||||
{
|
|
||||||
__ASM volatile("MSR cpsr_c, %0" : : "r" (mode) : "memory");
|
|
||||||
}
|
|
||||||
|
|
||||||
__IAR_FT uint32_t __LDREXW(uint32_t volatile *ptr)
|
|
||||||
{
|
|
||||||
return __LDREX((unsigned long *)ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
__IAR_FT uint32_t __STREXW(uint32_t value, uint32_t volatile *ptr)
|
|
||||||
{
|
|
||||||
return __STREX(value, (unsigned long *)ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
__IAR_FT uint32_t __RRX(uint32_t value)
|
|
||||||
{
|
|
||||||
uint32_t result;
|
|
||||||
__ASM("RRX %0, %1" : "=r"(result) : "r" (value) : "cc");
|
|
||||||
return(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
__IAR_FT uint32_t __ROR(uint32_t op1, uint32_t op2)
|
|
||||||
{
|
|
||||||
return (op1 >> op2) | (op1 << ((sizeof(op1)*8)-op2));
|
|
||||||
}
|
|
||||||
|
|
||||||
__IAR_FT uint32_t __get_FPEXC(void)
|
|
||||||
{
|
|
||||||
#if (__FPU_PRESENT == 1)
|
|
||||||
uint32_t result;
|
|
||||||
__ASM volatile("VMRS %0, fpexc" : "=r" (result) : : "memory");
|
|
||||||
return(result);
|
|
||||||
#else
|
|
||||||
return(0);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
__IAR_FT void __set_FPEXC(uint32_t fpexc)
|
|
||||||
{
|
|
||||||
#if (__FPU_PRESENT == 1)
|
|
||||||
__ASM volatile ("VMSR fpexc, %0" : : "r" (fpexc) : "memory");
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#define __get_CP(cp, op1, Rt, CRn, CRm, op2) \
|
|
||||||
__ASM volatile("MRC p" # cp ", " # op1 ", %0, c" # CRn ", c" # CRm ", " # op2 : "=r" (Rt) : : "memory" )
|
|
||||||
#define __set_CP(cp, op1, Rt, CRn, CRm, op2) \
|
|
||||||
__ASM volatile("MCR p" # cp ", " # op1 ", %0, c" # CRn ", c" # CRm ", " # op2 : : "r" (Rt) : "memory" )
|
|
||||||
#define __get_CP64(cp, op1, Rt, CRm) \
|
|
||||||
__ASM volatile("MRRC p" # cp ", " # op1 ", %Q0, %R0, c" # CRm : "=r" (Rt) : : "memory" )
|
|
||||||
#define __set_CP64(cp, op1, Rt, CRm) \
|
|
||||||
__ASM volatile("MCRR p" # cp ", " # op1 ", %Q0, %R0, c" # CRm : : "r" (Rt) : "memory" )
|
|
||||||
|
|
||||||
#include "cmsis_cp15.h"
|
|
||||||
|
|
||||||
#endif /* __ICCARM_INTRINSICS_VERSION__ == 2 */
|
|
||||||
|
|
||||||
#define __BKPT(value) __asm volatile ("BKPT %0" : : "i"(value))
|
|
||||||
|
|
||||||
|
|
||||||
__IAR_FT uint32_t __get_SP_usr(void)
|
|
||||||
{
|
|
||||||
uint32_t cpsr;
|
|
||||||
uint32_t result;
|
|
||||||
__ASM volatile(
|
|
||||||
"MRS %0, cpsr \n"
|
|
||||||
"CPS #0x1F \n" // no effect in USR mode
|
|
||||||
"MOV %1, sp \n"
|
|
||||||
"MSR cpsr_c, %2 \n" // no effect in USR mode
|
|
||||||
"ISB" : "=r"(cpsr), "=r"(result) : "r"(cpsr) : "memory"
|
|
||||||
);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
__IAR_FT void __set_SP_usr(uint32_t topOfProcStack)
|
|
||||||
{
|
|
||||||
uint32_t cpsr;
|
|
||||||
__ASM volatile(
|
|
||||||
"MRS %0, cpsr \n"
|
|
||||||
"CPS #0x1F \n" // no effect in USR mode
|
|
||||||
"MOV sp, %1 \n"
|
|
||||||
"MSR cpsr_c, %2 \n" // no effect in USR mode
|
|
||||||
"ISB" : "=r"(cpsr) : "r" (topOfProcStack), "r"(cpsr) : "memory"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define __get_mode() (__get_CPSR() & 0x1FU)
|
|
||||||
|
|
||||||
__STATIC_INLINE
|
|
||||||
void __FPU_Enable(void)
|
|
||||||
{
|
|
||||||
__ASM volatile(
|
|
||||||
//Permit access to VFP/NEON, registers by modifying CPACR
|
|
||||||
" MRC p15,0,R1,c1,c0,2 \n"
|
|
||||||
" ORR R1,R1,#0x00F00000 \n"
|
|
||||||
" MCR p15,0,R1,c1,c0,2 \n"
|
|
||||||
|
|
||||||
//Ensure that subsequent instructions occur in the context of VFP/NEON access permitted
|
|
||||||
" ISB \n"
|
|
||||||
|
|
||||||
//Enable VFP/NEON
|
|
||||||
" VMRS R1,FPEXC \n"
|
|
||||||
" ORR R1,R1,#0x40000000 \n"
|
|
||||||
" VMSR FPEXC,R1 \n"
|
|
||||||
|
|
||||||
//Initialise VFP/NEON registers to 0
|
|
||||||
" MOV R2,#0 \n"
|
|
||||||
|
|
||||||
//Initialise D16 registers to 0
|
|
||||||
" VMOV D0, R2,R2 \n"
|
|
||||||
" VMOV D1, R2,R2 \n"
|
|
||||||
" VMOV D2, R2,R2 \n"
|
|
||||||
" VMOV D3, R2,R2 \n"
|
|
||||||
" VMOV D4, R2,R2 \n"
|
|
||||||
" VMOV D5, R2,R2 \n"
|
|
||||||
" VMOV D6, R2,R2 \n"
|
|
||||||
" VMOV D7, R2,R2 \n"
|
|
||||||
" VMOV D8, R2,R2 \n"
|
|
||||||
" VMOV D9, R2,R2 \n"
|
|
||||||
" VMOV D10,R2,R2 \n"
|
|
||||||
" VMOV D11,R2,R2 \n"
|
|
||||||
" VMOV D12,R2,R2 \n"
|
|
||||||
" VMOV D13,R2,R2 \n"
|
|
||||||
" VMOV D14,R2,R2 \n"
|
|
||||||
" VMOV D15,R2,R2 \n"
|
|
||||||
|
|
||||||
#ifdef __ARM_ADVANCED_SIMD__
|
|
||||||
//Initialise D32 registers to 0
|
|
||||||
" VMOV D16,R2,R2 \n"
|
|
||||||
" VMOV D17,R2,R2 \n"
|
|
||||||
" VMOV D18,R2,R2 \n"
|
|
||||||
" VMOV D19,R2,R2 \n"
|
|
||||||
" VMOV D20,R2,R2 \n"
|
|
||||||
" VMOV D21,R2,R2 \n"
|
|
||||||
" VMOV D22,R2,R2 \n"
|
|
||||||
" VMOV D23,R2,R2 \n"
|
|
||||||
" VMOV D24,R2,R2 \n"
|
|
||||||
" VMOV D25,R2,R2 \n"
|
|
||||||
" VMOV D26,R2,R2 \n"
|
|
||||||
" VMOV D27,R2,R2 \n"
|
|
||||||
" VMOV D28,R2,R2 \n"
|
|
||||||
" VMOV D29,R2,R2 \n"
|
|
||||||
" VMOV D30,R2,R2 \n"
|
|
||||||
" VMOV D31,R2,R2 \n"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
//Initialise FPSCR to a known state
|
|
||||||
" VMRS R1,FPSCR \n"
|
|
||||||
" MOV32 R2,#0x00086060 \n" //Mask off all bits that do not have to be preserved. Non-preserved bits can/should be zero.
|
|
||||||
" AND R1,R1,R2 \n"
|
|
||||||
" VMSR FPSCR,R1 \n"
|
|
||||||
: : : "cc", "r1", "r2"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#undef __IAR_FT
|
|
||||||
#undef __ICCARM_V8
|
|
||||||
|
|
||||||
#pragma diag_default=Pe940
|
|
||||||
#pragma diag_default=Pe177
|
|
||||||
|
|
||||||
#endif /* __CMSIS_ICCARM_H__ */
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,192 +0,0 @@
|
|||||||
/**************************************************************************//**
|
|
||||||
* @file irq_ctrl.h
|
|
||||||
* @brief Interrupt Controller API header file
|
|
||||||
* @version V1.1.0
|
|
||||||
* @date 03. March 2020
|
|
||||||
******************************************************************************/
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2017-2020 ARM Limited. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#if defined ( __ICCARM__ )
|
|
||||||
#pragma system_include /* treat file as system include file for MISRA check */
|
|
||||||
#elif defined (__clang__)
|
|
||||||
#pragma clang system_header /* treat file as system include file */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef IRQ_CTRL_H_
|
|
||||||
#define IRQ_CTRL_H_
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
#ifndef IRQHANDLER_T
|
|
||||||
#define IRQHANDLER_T
|
|
||||||
/// Interrupt handler data type
|
|
||||||
typedef void (*IRQHandler_t) (void);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifndef IRQN_ID_T
|
|
||||||
#define IRQN_ID_T
|
|
||||||
/// Interrupt ID number data type
|
|
||||||
typedef int32_t IRQn_ID_t;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Interrupt mode bit-masks */
|
|
||||||
#define IRQ_MODE_TRIG_Pos (0U)
|
|
||||||
#define IRQ_MODE_TRIG_Msk (0x07UL /*<< IRQ_MODE_TRIG_Pos*/)
|
|
||||||
#define IRQ_MODE_TRIG_LEVEL (0x00UL /*<< IRQ_MODE_TRIG_Pos*/) ///< Trigger: level triggered interrupt
|
|
||||||
#define IRQ_MODE_TRIG_LEVEL_LOW (0x01UL /*<< IRQ_MODE_TRIG_Pos*/) ///< Trigger: low level triggered interrupt
|
|
||||||
#define IRQ_MODE_TRIG_LEVEL_HIGH (0x02UL /*<< IRQ_MODE_TRIG_Pos*/) ///< Trigger: high level triggered interrupt
|
|
||||||
#define IRQ_MODE_TRIG_EDGE (0x04UL /*<< IRQ_MODE_TRIG_Pos*/) ///< Trigger: edge triggered interrupt
|
|
||||||
#define IRQ_MODE_TRIG_EDGE_RISING (0x05UL /*<< IRQ_MODE_TRIG_Pos*/) ///< Trigger: rising edge triggered interrupt
|
|
||||||
#define IRQ_MODE_TRIG_EDGE_FALLING (0x06UL /*<< IRQ_MODE_TRIG_Pos*/) ///< Trigger: falling edge triggered interrupt
|
|
||||||
#define IRQ_MODE_TRIG_EDGE_BOTH (0x07UL /*<< IRQ_MODE_TRIG_Pos*/) ///< Trigger: rising and falling edge triggered interrupt
|
|
||||||
|
|
||||||
#define IRQ_MODE_TYPE_Pos (3U)
|
|
||||||
#define IRQ_MODE_TYPE_Msk (0x01UL << IRQ_MODE_TYPE_Pos)
|
|
||||||
#define IRQ_MODE_TYPE_IRQ (0x00UL << IRQ_MODE_TYPE_Pos) ///< Type: interrupt source triggers CPU IRQ line
|
|
||||||
#define IRQ_MODE_TYPE_FIQ (0x01UL << IRQ_MODE_TYPE_Pos) ///< Type: interrupt source triggers CPU FIQ line
|
|
||||||
|
|
||||||
#define IRQ_MODE_DOMAIN_Pos (4U)
|
|
||||||
#define IRQ_MODE_DOMAIN_Msk (0x01UL << IRQ_MODE_DOMAIN_Pos)
|
|
||||||
#define IRQ_MODE_DOMAIN_NONSECURE (0x00UL << IRQ_MODE_DOMAIN_Pos) ///< Domain: interrupt is targeting non-secure domain
|
|
||||||
#define IRQ_MODE_DOMAIN_SECURE (0x01UL << IRQ_MODE_DOMAIN_Pos) ///< Domain: interrupt is targeting secure domain
|
|
||||||
|
|
||||||
#define IRQ_MODE_CPU_Pos (5U)
|
|
||||||
#define IRQ_MODE_CPU_Msk (0xFFUL << IRQ_MODE_CPU_Pos)
|
|
||||||
#define IRQ_MODE_CPU_ALL (0x00UL << IRQ_MODE_CPU_Pos) ///< CPU: interrupt targets all CPUs
|
|
||||||
#define IRQ_MODE_CPU_0 (0x01UL << IRQ_MODE_CPU_Pos) ///< CPU: interrupt targets CPU 0
|
|
||||||
#define IRQ_MODE_CPU_1 (0x02UL << IRQ_MODE_CPU_Pos) ///< CPU: interrupt targets CPU 1
|
|
||||||
#define IRQ_MODE_CPU_2 (0x04UL << IRQ_MODE_CPU_Pos) ///< CPU: interrupt targets CPU 2
|
|
||||||
#define IRQ_MODE_CPU_3 (0x08UL << IRQ_MODE_CPU_Pos) ///< CPU: interrupt targets CPU 3
|
|
||||||
#define IRQ_MODE_CPU_4 (0x10UL << IRQ_MODE_CPU_Pos) ///< CPU: interrupt targets CPU 4
|
|
||||||
#define IRQ_MODE_CPU_5 (0x20UL << IRQ_MODE_CPU_Pos) ///< CPU: interrupt targets CPU 5
|
|
||||||
#define IRQ_MODE_CPU_6 (0x40UL << IRQ_MODE_CPU_Pos) ///< CPU: interrupt targets CPU 6
|
|
||||||
#define IRQ_MODE_CPU_7 (0x80UL << IRQ_MODE_CPU_Pos) ///< CPU: interrupt targets CPU 7
|
|
||||||
|
|
||||||
// Encoding in some early GIC implementations
|
|
||||||
#define IRQ_MODE_MODEL_Pos (13U)
|
|
||||||
#define IRQ_MODE_MODEL_Msk (0x1UL << IRQ_MODE_MODEL_Pos)
|
|
||||||
#define IRQ_MODE_MODEL_NN (0x0UL << IRQ_MODE_MODEL_Pos) ///< Corresponding interrupt is handled using the N-N model
|
|
||||||
#define IRQ_MODE_MODEL_1N (0x1UL << IRQ_MODE_MODEL_Pos) ///< Corresponding interrupt is handled using the 1-N model
|
|
||||||
|
|
||||||
#define IRQ_MODE_ERROR (0x80000000UL) ///< Bit indicating mode value error
|
|
||||||
|
|
||||||
/* Interrupt priority bit-masks */
|
|
||||||
#define IRQ_PRIORITY_Msk (0x0000FFFFUL) ///< Interrupt priority value bit-mask
|
|
||||||
#define IRQ_PRIORITY_ERROR (0x80000000UL) ///< Bit indicating priority value error
|
|
||||||
|
|
||||||
/// Initialize interrupt controller.
|
|
||||||
/// \return 0 on success, -1 on error.
|
|
||||||
int32_t IRQ_Initialize (void);
|
|
||||||
|
|
||||||
/// Register interrupt handler.
|
|
||||||
/// \param[in] irqn interrupt ID number
|
|
||||||
/// \param[in] handler interrupt handler function address
|
|
||||||
/// \return 0 on success, -1 on error.
|
|
||||||
int32_t IRQ_SetHandler (IRQn_ID_t irqn, IRQHandler_t handler);
|
|
||||||
|
|
||||||
/// Get the registered interrupt handler.
|
|
||||||
/// \param[in] irqn interrupt ID number
|
|
||||||
/// \return registered interrupt handler function address.
|
|
||||||
IRQHandler_t IRQ_GetHandler (IRQn_ID_t irqn);
|
|
||||||
|
|
||||||
/// Enable interrupt.
|
|
||||||
/// \param[in] irqn interrupt ID number
|
|
||||||
/// \return 0 on success, -1 on error.
|
|
||||||
int32_t IRQ_Enable (IRQn_ID_t irqn);
|
|
||||||
|
|
||||||
/// Disable interrupt.
|
|
||||||
/// \param[in] irqn interrupt ID number
|
|
||||||
/// \return 0 on success, -1 on error.
|
|
||||||
int32_t IRQ_Disable (IRQn_ID_t irqn);
|
|
||||||
|
|
||||||
/// Get interrupt enable state.
|
|
||||||
/// \param[in] irqn interrupt ID number
|
|
||||||
/// \return 0 - interrupt is disabled, 1 - interrupt is enabled.
|
|
||||||
uint32_t IRQ_GetEnableState (IRQn_ID_t irqn);
|
|
||||||
|
|
||||||
/// Configure interrupt request mode.
|
|
||||||
/// \param[in] irqn interrupt ID number
|
|
||||||
/// \param[in] mode mode configuration
|
|
||||||
/// \return 0 on success, -1 on error.
|
|
||||||
int32_t IRQ_SetMode (IRQn_ID_t irqn, uint32_t mode);
|
|
||||||
|
|
||||||
/// Get interrupt mode configuration.
|
|
||||||
/// \param[in] irqn interrupt ID number
|
|
||||||
/// \return current interrupt mode configuration with optional IRQ_MODE_ERROR bit set.
|
|
||||||
uint32_t IRQ_GetMode (IRQn_ID_t irqn);
|
|
||||||
|
|
||||||
/// Get ID number of current interrupt request (IRQ).
|
|
||||||
/// \return interrupt ID number.
|
|
||||||
IRQn_ID_t IRQ_GetActiveIRQ (void);
|
|
||||||
|
|
||||||
/// Get ID number of current fast interrupt request (FIQ).
|
|
||||||
/// \return interrupt ID number.
|
|
||||||
IRQn_ID_t IRQ_GetActiveFIQ (void);
|
|
||||||
|
|
||||||
/// Signal end of interrupt processing.
|
|
||||||
/// \param[in] irqn interrupt ID number
|
|
||||||
/// \return 0 on success, -1 on error.
|
|
||||||
int32_t IRQ_EndOfInterrupt (IRQn_ID_t irqn);
|
|
||||||
|
|
||||||
/// Set interrupt pending flag.
|
|
||||||
/// \param[in] irqn interrupt ID number
|
|
||||||
/// \return 0 on success, -1 on error.
|
|
||||||
int32_t IRQ_SetPending (IRQn_ID_t irqn);
|
|
||||||
|
|
||||||
/// Get interrupt pending flag.
|
|
||||||
/// \param[in] irqn interrupt ID number
|
|
||||||
/// \return 0 - interrupt is not pending, 1 - interrupt is pending.
|
|
||||||
uint32_t IRQ_GetPending (IRQn_ID_t irqn);
|
|
||||||
|
|
||||||
/// Clear interrupt pending flag.
|
|
||||||
/// \param[in] irqn interrupt ID number
|
|
||||||
/// \return 0 on success, -1 on error.
|
|
||||||
int32_t IRQ_ClearPending (IRQn_ID_t irqn);
|
|
||||||
|
|
||||||
/// Set interrupt priority value.
|
|
||||||
/// \param[in] irqn interrupt ID number
|
|
||||||
/// \param[in] priority interrupt priority value
|
|
||||||
/// \return 0 on success, -1 on error.
|
|
||||||
int32_t IRQ_SetPriority (IRQn_ID_t irqn, uint32_t priority);
|
|
||||||
|
|
||||||
/// Get interrupt priority.
|
|
||||||
/// \param[in] irqn interrupt ID number
|
|
||||||
/// \return current interrupt priority value with optional IRQ_PRIORITY_ERROR bit set.
|
|
||||||
uint32_t IRQ_GetPriority (IRQn_ID_t irqn);
|
|
||||||
|
|
||||||
/// Set priority masking threshold.
|
|
||||||
/// \param[in] priority priority masking threshold value
|
|
||||||
/// \return 0 on success, -1 on error.
|
|
||||||
int32_t IRQ_SetPriorityMask (uint32_t priority);
|
|
||||||
|
|
||||||
/// Get priority masking threshold
|
|
||||||
/// \return current priority masking threshold value with optional IRQ_PRIORITY_ERROR bit set.
|
|
||||||
uint32_t IRQ_GetPriorityMask (void);
|
|
||||||
|
|
||||||
/// Set priority grouping field split point
|
|
||||||
/// \param[in] bits number of MSB bits included in the group priority field comparison
|
|
||||||
/// \return 0 on success, -1 on error.
|
|
||||||
int32_t IRQ_SetPriorityGroupBits (uint32_t bits);
|
|
||||||
|
|
||||||
/// Get priority grouping field split point
|
|
||||||
/// \return current number of MSB bits included in the group priority field comparison with
|
|
||||||
/// optional IRQ_PRIORITY_ERROR bit set.
|
|
||||||
uint32_t IRQ_GetPriorityGroupBits (void);
|
|
||||||
|
|
||||||
#endif // IRQ_CTRL_H_
|
|
||||||
@ -1,418 +0,0 @@
|
|||||||
/**************************************************************************//**
|
|
||||||
* @file irq_ctrl_gic.c
|
|
||||||
* @brief Interrupt controller handling implementation for GIC
|
|
||||||
* @version V1.1.1
|
|
||||||
* @date 29. March 2021
|
|
||||||
******************************************************************************/
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2017-2021 ARM Limited. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
|
|
||||||
#include "RTE_Components.h"
|
|
||||||
#include CMSIS_device_header
|
|
||||||
|
|
||||||
#include "irq_ctrl.h"
|
|
||||||
|
|
||||||
#if defined(__GIC_PRESENT) && (__GIC_PRESENT == 1U)
|
|
||||||
|
|
||||||
/// Number of implemented interrupt lines
|
|
||||||
#ifndef IRQ_GIC_LINE_COUNT
|
|
||||||
#define IRQ_GIC_LINE_COUNT (1020U)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
static IRQHandler_t IRQTable[IRQ_GIC_LINE_COUNT] = { 0U };
|
|
||||||
static uint32_t IRQ_ID0;
|
|
||||||
|
|
||||||
/// Initialize interrupt controller.
|
|
||||||
__WEAK int32_t IRQ_Initialize (void) {
|
|
||||||
uint32_t i;
|
|
||||||
|
|
||||||
for (i = 0U; i < IRQ_GIC_LINE_COUNT; i++) {
|
|
||||||
IRQTable[i] = (IRQHandler_t)NULL;
|
|
||||||
}
|
|
||||||
GIC_Enable();
|
|
||||||
return (0);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Register interrupt handler.
|
|
||||||
__WEAK int32_t IRQ_SetHandler (IRQn_ID_t irqn, IRQHandler_t handler) {
|
|
||||||
int32_t status;
|
|
||||||
|
|
||||||
if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) {
|
|
||||||
IRQTable[irqn] = handler;
|
|
||||||
status = 0;
|
|
||||||
} else {
|
|
||||||
status = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (status);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Get the registered interrupt handler.
|
|
||||||
__WEAK IRQHandler_t IRQ_GetHandler (IRQn_ID_t irqn) {
|
|
||||||
IRQHandler_t h;
|
|
||||||
|
|
||||||
// Ignore CPUID field (software generated interrupts)
|
|
||||||
irqn &= 0x3FFU;
|
|
||||||
|
|
||||||
if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) {
|
|
||||||
h = IRQTable[irqn];
|
|
||||||
} else {
|
|
||||||
h = (IRQHandler_t)0;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (h);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Enable interrupt.
|
|
||||||
__WEAK int32_t IRQ_Enable (IRQn_ID_t irqn) {
|
|
||||||
int32_t status;
|
|
||||||
|
|
||||||
if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) {
|
|
||||||
GIC_EnableIRQ ((IRQn_Type)irqn);
|
|
||||||
status = 0;
|
|
||||||
} else {
|
|
||||||
status = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (status);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Disable interrupt.
|
|
||||||
__WEAK int32_t IRQ_Disable (IRQn_ID_t irqn) {
|
|
||||||
int32_t status;
|
|
||||||
|
|
||||||
if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) {
|
|
||||||
GIC_DisableIRQ ((IRQn_Type)irqn);
|
|
||||||
status = 0;
|
|
||||||
} else {
|
|
||||||
status = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (status);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Get interrupt enable state.
|
|
||||||
__WEAK uint32_t IRQ_GetEnableState (IRQn_ID_t irqn) {
|
|
||||||
uint32_t enable;
|
|
||||||
|
|
||||||
if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) {
|
|
||||||
enable = GIC_GetEnableIRQ((IRQn_Type)irqn);
|
|
||||||
} else {
|
|
||||||
enable = 0U;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (enable);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Configure interrupt request mode.
|
|
||||||
__WEAK int32_t IRQ_SetMode (IRQn_ID_t irqn, uint32_t mode) {
|
|
||||||
uint32_t val;
|
|
||||||
uint8_t cfg;
|
|
||||||
uint8_t secure;
|
|
||||||
uint8_t cpu;
|
|
||||||
int32_t status = 0;
|
|
||||||
|
|
||||||
if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) {
|
|
||||||
// Check triggering mode
|
|
||||||
val = (mode & IRQ_MODE_TRIG_Msk);
|
|
||||||
|
|
||||||
if (val == IRQ_MODE_TRIG_LEVEL) {
|
|
||||||
cfg = 0x00U;
|
|
||||||
} else if (val == IRQ_MODE_TRIG_EDGE) {
|
|
||||||
cfg = 0x02U;
|
|
||||||
} else {
|
|
||||||
cfg = 0x00U;
|
|
||||||
status = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
val = (mode & IRQ_MODE_MODEL_Msk);
|
|
||||||
if (val == IRQ_MODE_MODEL_1N) {
|
|
||||||
cfg |= 1; // 1-N model
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check interrupt type
|
|
||||||
val = mode & IRQ_MODE_TYPE_Msk;
|
|
||||||
|
|
||||||
if (val != IRQ_MODE_TYPE_IRQ) {
|
|
||||||
status = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check interrupt domain
|
|
||||||
val = mode & IRQ_MODE_DOMAIN_Msk;
|
|
||||||
|
|
||||||
if (val == IRQ_MODE_DOMAIN_NONSECURE) {
|
|
||||||
secure = 0U;
|
|
||||||
} else {
|
|
||||||
// Check security extensions support
|
|
||||||
val = GIC_DistributorInfo() & (1UL << 10U);
|
|
||||||
|
|
||||||
if (val != 0U) {
|
|
||||||
// Security extensions are supported
|
|
||||||
secure = 1U;
|
|
||||||
} else {
|
|
||||||
secure = 0U;
|
|
||||||
status = -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check interrupt CPU targets
|
|
||||||
val = mode & IRQ_MODE_CPU_Msk;
|
|
||||||
|
|
||||||
if (val == IRQ_MODE_CPU_ALL) {
|
|
||||||
cpu = 0xFFU;
|
|
||||||
} else {
|
|
||||||
cpu = (uint8_t)(val >> IRQ_MODE_CPU_Pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Apply configuration if no mode error
|
|
||||||
if (status == 0) {
|
|
||||||
GIC_SetConfiguration((IRQn_Type)irqn, cfg);
|
|
||||||
GIC_SetTarget ((IRQn_Type)irqn, cpu);
|
|
||||||
|
|
||||||
if (secure != 0U) {
|
|
||||||
GIC_SetGroup ((IRQn_Type)irqn, secure);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return (status);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Get interrupt mode configuration.
|
|
||||||
__WEAK uint32_t IRQ_GetMode (IRQn_ID_t irqn) {
|
|
||||||
uint32_t mode;
|
|
||||||
uint32_t val;
|
|
||||||
|
|
||||||
if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) {
|
|
||||||
mode = IRQ_MODE_TYPE_IRQ;
|
|
||||||
|
|
||||||
// Get trigger mode
|
|
||||||
val = GIC_GetConfiguration((IRQn_Type)irqn);
|
|
||||||
|
|
||||||
if ((val & 2U) != 0U) {
|
|
||||||
// Corresponding interrupt is edge triggered
|
|
||||||
mode |= IRQ_MODE_TRIG_EDGE;
|
|
||||||
} else {
|
|
||||||
// Corresponding interrupt is level triggered
|
|
||||||
mode |= IRQ_MODE_TRIG_LEVEL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (val & 1U) {
|
|
||||||
mode |= IRQ_MODE_MODEL_1N;
|
|
||||||
}
|
|
||||||
// Get interrupt CPU targets
|
|
||||||
mode |= GIC_GetTarget ((IRQn_Type)irqn) << IRQ_MODE_CPU_Pos;
|
|
||||||
|
|
||||||
} else {
|
|
||||||
mode = IRQ_MODE_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (mode);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Get ID number of current interrupt request (IRQ).
|
|
||||||
__WEAK IRQn_ID_t IRQ_GetActiveIRQ (void) {
|
|
||||||
IRQn_ID_t irqn;
|
|
||||||
uint32_t prio;
|
|
||||||
|
|
||||||
/* Dummy read to avoid GIC 390 errata 801120 */
|
|
||||||
GIC_GetHighPendingIRQ();
|
|
||||||
|
|
||||||
irqn = GIC_AcknowledgePending();
|
|
||||||
|
|
||||||
__DSB();
|
|
||||||
|
|
||||||
/* Workaround GIC 390 errata 733075 (GIC-390_Errata_Notice_v6.pdf, 09-Jul-2014) */
|
|
||||||
/* The following workaround code is for a single-core system. It would be */
|
|
||||||
/* different in a multi-core system. */
|
|
||||||
/* If the ID is 0 or 0x3FE or 0x3FF, then the GIC CPU interface may be locked-up */
|
|
||||||
/* so unlock it, otherwise service the interrupt as normal. */
|
|
||||||
/* Special IDs 1020=0x3FC and 1021=0x3FD are reserved values in GICv1 and GICv2 */
|
|
||||||
/* so will not occur here. */
|
|
||||||
|
|
||||||
if ((irqn == 0) || (irqn >= 0x3FE)) {
|
|
||||||
/* Unlock the CPU interface with a dummy write to Interrupt Priority Register */
|
|
||||||
prio = GIC_GetPriority((IRQn_Type)0);
|
|
||||||
GIC_SetPriority ((IRQn_Type)0, prio);
|
|
||||||
|
|
||||||
__DSB();
|
|
||||||
|
|
||||||
if ((irqn == 0U) && ((GIC_GetIRQStatus ((IRQn_Type)irqn) & 1U) != 0U) && (IRQ_ID0 == 0U)) {
|
|
||||||
/* If the ID is 0, is active and has not been seen before */
|
|
||||||
IRQ_ID0 = 1U;
|
|
||||||
}
|
|
||||||
/* End of Workaround GIC 390 errata 733075 */
|
|
||||||
}
|
|
||||||
|
|
||||||
return (irqn);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Get ID number of current fast interrupt request (FIQ).
|
|
||||||
__WEAK IRQn_ID_t IRQ_GetActiveFIQ (void) {
|
|
||||||
return ((IRQn_ID_t)-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Signal end of interrupt processing.
|
|
||||||
__WEAK int32_t IRQ_EndOfInterrupt (IRQn_ID_t irqn) {
|
|
||||||
int32_t status;
|
|
||||||
IRQn_Type irq = (IRQn_Type)irqn;
|
|
||||||
|
|
||||||
irqn &= 0x3FFU;
|
|
||||||
|
|
||||||
if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) {
|
|
||||||
GIC_EndInterrupt (irq);
|
|
||||||
|
|
||||||
if (irqn == 0) {
|
|
||||||
IRQ_ID0 = 0U;
|
|
||||||
}
|
|
||||||
|
|
||||||
status = 0;
|
|
||||||
} else {
|
|
||||||
status = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (status);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Set interrupt pending flag.
|
|
||||||
__WEAK int32_t IRQ_SetPending (IRQn_ID_t irqn) {
|
|
||||||
int32_t status;
|
|
||||||
|
|
||||||
if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) {
|
|
||||||
GIC_SetPendingIRQ ((IRQn_Type)irqn);
|
|
||||||
status = 0;
|
|
||||||
} else {
|
|
||||||
status = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (status);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Get interrupt pending flag.
|
|
||||||
__WEAK uint32_t IRQ_GetPending (IRQn_ID_t irqn) {
|
|
||||||
uint32_t pending;
|
|
||||||
|
|
||||||
if ((irqn >= 16) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) {
|
|
||||||
pending = GIC_GetPendingIRQ ((IRQn_Type)irqn);
|
|
||||||
} else {
|
|
||||||
pending = 0U;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (pending & 1U);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Clear interrupt pending flag.
|
|
||||||
__WEAK int32_t IRQ_ClearPending (IRQn_ID_t irqn) {
|
|
||||||
int32_t status;
|
|
||||||
|
|
||||||
if ((irqn >= 16) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) {
|
|
||||||
GIC_ClearPendingIRQ ((IRQn_Type)irqn);
|
|
||||||
status = 0;
|
|
||||||
} else {
|
|
||||||
status = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (status);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Set interrupt priority value.
|
|
||||||
__WEAK int32_t IRQ_SetPriority (IRQn_ID_t irqn, uint32_t priority) {
|
|
||||||
int32_t status;
|
|
||||||
|
|
||||||
if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) {
|
|
||||||
GIC_SetPriority ((IRQn_Type)irqn, priority);
|
|
||||||
status = 0;
|
|
||||||
} else {
|
|
||||||
status = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (status);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Get interrupt priority.
|
|
||||||
__WEAK uint32_t IRQ_GetPriority (IRQn_ID_t irqn) {
|
|
||||||
uint32_t priority;
|
|
||||||
|
|
||||||
if ((irqn >= 0) && (irqn < (IRQn_ID_t)IRQ_GIC_LINE_COUNT)) {
|
|
||||||
priority = GIC_GetPriority ((IRQn_Type)irqn);
|
|
||||||
} else {
|
|
||||||
priority = IRQ_PRIORITY_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (priority);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Set priority masking threshold.
|
|
||||||
__WEAK int32_t IRQ_SetPriorityMask (uint32_t priority) {
|
|
||||||
GIC_SetInterfacePriorityMask (priority);
|
|
||||||
return (0);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Get priority masking threshold
|
|
||||||
__WEAK uint32_t IRQ_GetPriorityMask (void) {
|
|
||||||
return GIC_GetInterfacePriorityMask();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Set priority grouping field split point
|
|
||||||
__WEAK int32_t IRQ_SetPriorityGroupBits (uint32_t bits) {
|
|
||||||
int32_t status;
|
|
||||||
|
|
||||||
if (bits == IRQ_PRIORITY_Msk) {
|
|
||||||
bits = 7U;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bits < 8U) {
|
|
||||||
GIC_SetBinaryPoint (7U - bits);
|
|
||||||
status = 0;
|
|
||||||
} else {
|
|
||||||
status = -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (status);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/// Get priority grouping field split point
|
|
||||||
__WEAK uint32_t IRQ_GetPriorityGroupBits (void) {
|
|
||||||
uint32_t bp;
|
|
||||||
|
|
||||||
bp = GIC_GetBinaryPoint() & 0x07U;
|
|
||||||
|
|
||||||
return (7U - bp);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
@ -1,561 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2013-2021 ARM Limited. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*
|
|
||||||
* ----------------------------------------------------------------------
|
|
||||||
*
|
|
||||||
* $Date: 16. June 2021
|
|
||||||
* $Revision: V2.1.0
|
|
||||||
*
|
|
||||||
* Project: CMSIS-DAP Configuration
|
|
||||||
* Title: DAP_config.h CMSIS-DAP Configuration File (Template)
|
|
||||||
*
|
|
||||||
*---------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
#ifndef __DAP_CONFIG_H__
|
|
||||||
#define __DAP_CONFIG_H__
|
|
||||||
|
|
||||||
|
|
||||||
//**************************************************************************************************
|
|
||||||
/**
|
|
||||||
\defgroup DAP_Config_Debug_gr CMSIS-DAP Debug Unit Information
|
|
||||||
\ingroup DAP_ConfigIO_gr
|
|
||||||
@{
|
|
||||||
Provides definitions about the hardware and configuration of the Debug Unit.
|
|
||||||
|
|
||||||
This information includes:
|
|
||||||
- Definition of Cortex-M processor parameters used in CMSIS-DAP Debug Unit.
|
|
||||||
- Debug Unit Identification strings (Vendor, Product, Serial Number).
|
|
||||||
- Debug Unit communication packet size.
|
|
||||||
- Debug Access Port supported modes and settings (JTAG/SWD and SWO).
|
|
||||||
- Optional information about a connected Target Device (for Evaluation Boards).
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifdef _RTE_
|
|
||||||
#include "RTE_Components.h"
|
|
||||||
#include CMSIS_device_header
|
|
||||||
#else
|
|
||||||
#include "device.h" // Debug Unit Cortex-M Processor Header File
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/// Processor Clock of the Cortex-M MCU used in the Debug Unit.
|
|
||||||
/// This value is used to calculate the SWD/JTAG clock speed.
|
|
||||||
#define CPU_CLOCK 100000000U ///< Specifies the CPU Clock in Hz.
|
|
||||||
|
|
||||||
/// Number of processor cycles for I/O Port write operations.
|
|
||||||
/// This value is used to calculate the SWD/JTAG clock speed that is generated with I/O
|
|
||||||
/// Port write operations in the Debug Unit by a Cortex-M MCU. Most Cortex-M processors
|
|
||||||
/// require 2 processor cycles for a I/O Port Write operation. If the Debug Unit uses
|
|
||||||
/// a Cortex-M0+ processor with high-speed peripheral I/O only 1 processor cycle might be
|
|
||||||
/// required.
|
|
||||||
#define IO_PORT_WRITE_CYCLES 2U ///< I/O Cycles: 2=default, 1=Cortex-M0+ fast I/0.
|
|
||||||
|
|
||||||
/// Indicate that Serial Wire Debug (SWD) communication mode is available at the Debug Access Port.
|
|
||||||
/// This information is returned by the command \ref DAP_Info as part of <b>Capabilities</b>.
|
|
||||||
#define DAP_SWD 1 ///< SWD Mode: 1 = available, 0 = not available.
|
|
||||||
|
|
||||||
/// Indicate that JTAG communication mode is available at the Debug Port.
|
|
||||||
/// This information is returned by the command \ref DAP_Info as part of <b>Capabilities</b>.
|
|
||||||
#define DAP_JTAG 1 ///< JTAG Mode: 1 = available, 0 = not available.
|
|
||||||
|
|
||||||
/// Configure maximum number of JTAG devices on the scan chain connected to the Debug Access Port.
|
|
||||||
/// This setting impacts the RAM requirements of the Debug Unit. Valid range is 1 .. 255.
|
|
||||||
#define DAP_JTAG_DEV_CNT 8U ///< Maximum number of JTAG devices on scan chain.
|
|
||||||
|
|
||||||
/// Default communication mode on the Debug Access Port.
|
|
||||||
/// Used for the command \ref DAP_Connect when Port Default mode is selected.
|
|
||||||
#define DAP_DEFAULT_PORT 1U ///< Default JTAG/SWJ Port Mode: 1 = SWD, 2 = JTAG.
|
|
||||||
|
|
||||||
/// Default communication speed on the Debug Access Port for SWD and JTAG mode.
|
|
||||||
/// Used to initialize the default SWD/JTAG clock frequency.
|
|
||||||
/// The command \ref DAP_SWJ_Clock can be used to overwrite this default setting.
|
|
||||||
#define DAP_DEFAULT_SWJ_CLOCK 1000000U ///< Default SWD/JTAG clock frequency in Hz.
|
|
||||||
|
|
||||||
/// Maximum Package Size for Command and Response data.
|
|
||||||
/// This configuration settings is used to optimize the communication performance with the
|
|
||||||
/// debugger and depends on the USB peripheral. Typical vales are 64 for Full-speed USB HID or WinUSB,
|
|
||||||
/// 1024 for High-speed USB HID and 512 for High-speed USB WinUSB.
|
|
||||||
#define DAP_PACKET_SIZE 512U ///< Specifies Packet Size in bytes.
|
|
||||||
|
|
||||||
/// Maximum Package Buffers for Command and Response data.
|
|
||||||
/// This configuration settings is used to optimize the communication performance with the
|
|
||||||
/// debugger and depends on the USB peripheral. For devices with limited RAM or USB buffer the
|
|
||||||
/// setting can be reduced (valid range is 1 .. 255).
|
|
||||||
#define DAP_PACKET_COUNT 8U ///< Specifies number of packets buffered.
|
|
||||||
|
|
||||||
/// Indicate that UART Serial Wire Output (SWO) trace is available.
|
|
||||||
/// This information is returned by the command \ref DAP_Info as part of <b>Capabilities</b>.
|
|
||||||
#define SWO_UART 1 ///< SWO UART: 1 = available, 0 = not available.
|
|
||||||
|
|
||||||
/// USART Driver instance number for the UART SWO.
|
|
||||||
#define SWO_UART_DRIVER 0 ///< USART Driver instance number (Driver_USART#).
|
|
||||||
|
|
||||||
/// Maximum SWO UART Baudrate.
|
|
||||||
#define SWO_UART_MAX_BAUDRATE 10000000U ///< SWO UART Maximum Baudrate in Hz.
|
|
||||||
|
|
||||||
/// Indicate that Manchester Serial Wire Output (SWO) trace is available.
|
|
||||||
/// This information is returned by the command \ref DAP_Info as part of <b>Capabilities</b>.
|
|
||||||
#define SWO_MANCHESTER 0 ///< SWO Manchester: 1 = available, 0 = not available.
|
|
||||||
|
|
||||||
/// SWO Trace Buffer Size.
|
|
||||||
#define SWO_BUFFER_SIZE 4096U ///< SWO Trace Buffer Size in bytes (must be 2^n).
|
|
||||||
|
|
||||||
/// SWO Streaming Trace.
|
|
||||||
#define SWO_STREAM 0 ///< SWO Streaming Trace: 1 = available, 0 = not available.
|
|
||||||
|
|
||||||
/// Clock frequency of the Test Domain Timer. Timer value is returned with \ref TIMESTAMP_GET.
|
|
||||||
#define TIMESTAMP_CLOCK 100000000U ///< Timestamp clock in Hz (0 = timestamps not supported).
|
|
||||||
|
|
||||||
/// Indicate that UART Communication Port is available.
|
|
||||||
/// This information is returned by the command \ref DAP_Info as part of <b>Capabilities</b>.
|
|
||||||
#define DAP_UART 1 ///< DAP UART: 1 = available, 0 = not available.
|
|
||||||
|
|
||||||
/// USART Driver instance number for the UART Communication Port.
|
|
||||||
#define DAP_UART_DRIVER 1 ///< USART Driver instance number (Driver_USART#).
|
|
||||||
|
|
||||||
/// UART Receive Buffer Size.
|
|
||||||
#define DAP_UART_RX_BUFFER_SIZE 1024U ///< Uart Receive Buffer Size in bytes (must be 2^n).
|
|
||||||
|
|
||||||
/// UART Transmit Buffer Size.
|
|
||||||
#define DAP_UART_TX_BUFFER_SIZE 1024U ///< Uart Transmit Buffer Size in bytes (must be 2^n).
|
|
||||||
|
|
||||||
/// Indicate that UART Communication via USB COM Port is available.
|
|
||||||
/// This information is returned by the command \ref DAP_Info as part of <b>Capabilities</b>.
|
|
||||||
#define DAP_UART_USB_COM_PORT 1 ///< USB COM Port: 1 = available, 0 = not available.
|
|
||||||
|
|
||||||
/// Debug Unit is connected to fixed Target Device.
|
|
||||||
/// The Debug Unit may be part of an evaluation board and always connected to a fixed
|
|
||||||
/// known device. In this case a Device Vendor, Device Name, Board Vendor and Board Name strings
|
|
||||||
/// are stored and may be used by the debugger or IDE to configure device parameters.
|
|
||||||
#define TARGET_FIXED 0 ///< Target: 1 = known, 0 = unknown;
|
|
||||||
|
|
||||||
#define TARGET_DEVICE_VENDOR "Arm" ///< String indicating the Silicon Vendor
|
|
||||||
#define TARGET_DEVICE_NAME "Cortex-M" ///< String indicating the Target Device
|
|
||||||
#define TARGET_BOARD_VENDOR "Arm" ///< String indicating the Board Vendor
|
|
||||||
#define TARGET_BOARD_NAME "Arm board" ///< String indicating the Board Name
|
|
||||||
|
|
||||||
#if TARGET_FIXED != 0
|
|
||||||
#include <string.h>
|
|
||||||
static const char TargetDeviceVendor [] = TARGET_DEVICE_VENDOR;
|
|
||||||
static const char TargetDeviceName [] = TARGET_DEVICE_NAME;
|
|
||||||
static const char TargetBoardVendor [] = TARGET_BOARD_VENDOR;
|
|
||||||
static const char TargetBoardName [] = TARGET_BOARD_NAME;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/** Get Vendor Name string.
|
|
||||||
\param str Pointer to buffer to store the string (max 60 characters).
|
|
||||||
\return String length (including terminating NULL character) or 0 (no string).
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE uint8_t DAP_GetVendorString (char *str) {
|
|
||||||
(void)str;
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Get Product Name string.
|
|
||||||
\param str Pointer to buffer to store the string (max 60 characters).
|
|
||||||
\return String length (including terminating NULL character) or 0 (no string).
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE uint8_t DAP_GetProductString (char *str) {
|
|
||||||
(void)str;
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Get Serial Number string.
|
|
||||||
\param str Pointer to buffer to store the string (max 60 characters).
|
|
||||||
\return String length (including terminating NULL character) or 0 (no string).
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE uint8_t DAP_GetSerNumString (char *str) {
|
|
||||||
(void)str;
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Get Target Device Vendor string.
|
|
||||||
\param str Pointer to buffer to store the string (max 60 characters).
|
|
||||||
\return String length (including terminating NULL character) or 0 (no string).
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE uint8_t DAP_GetTargetDeviceVendorString (char *str) {
|
|
||||||
#if TARGET_FIXED != 0
|
|
||||||
uint8_t len;
|
|
||||||
|
|
||||||
strcpy(str, TargetDeviceVendor);
|
|
||||||
len = (uint8_t)(strlen(TargetDeviceVendor) + 1U);
|
|
||||||
return (len);
|
|
||||||
#else
|
|
||||||
(void)str;
|
|
||||||
return (0U);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Get Target Device Name string.
|
|
||||||
\param str Pointer to buffer to store the string (max 60 characters).
|
|
||||||
\return String length (including terminating NULL character) or 0 (no string).
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE uint8_t DAP_GetTargetDeviceNameString (char *str) {
|
|
||||||
#if TARGET_FIXED != 0
|
|
||||||
uint8_t len;
|
|
||||||
|
|
||||||
strcpy(str, TargetDeviceName);
|
|
||||||
len = (uint8_t)(strlen(TargetDeviceName) + 1U);
|
|
||||||
return (len);
|
|
||||||
#else
|
|
||||||
(void)str;
|
|
||||||
return (0U);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Get Target Board Vendor string.
|
|
||||||
\param str Pointer to buffer to store the string (max 60 characters).
|
|
||||||
\return String length (including terminating NULL character) or 0 (no string).
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE uint8_t DAP_GetTargetBoardVendorString (char *str) {
|
|
||||||
#if TARGET_FIXED != 0
|
|
||||||
uint8_t len;
|
|
||||||
|
|
||||||
strcpy(str, TargetBoardVendor);
|
|
||||||
len = (uint8_t)(strlen(TargetBoardVendor) + 1U);
|
|
||||||
return (len);
|
|
||||||
#else
|
|
||||||
(void)str;
|
|
||||||
return (0U);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Get Target Board Name string.
|
|
||||||
\param str Pointer to buffer to store the string (max 60 characters).
|
|
||||||
\return String length (including terminating NULL character) or 0 (no string).
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE uint8_t DAP_GetTargetBoardNameString (char *str) {
|
|
||||||
#if TARGET_FIXED != 0
|
|
||||||
uint8_t len;
|
|
||||||
|
|
||||||
strcpy(str, TargetBoardName);
|
|
||||||
len = (uint8_t)(strlen(TargetBoardName) + 1U);
|
|
||||||
return (len);
|
|
||||||
#else
|
|
||||||
(void)str;
|
|
||||||
return (0U);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Get Product Firmware Version string.
|
|
||||||
\param str Pointer to buffer to store the string (max 60 characters).
|
|
||||||
\return String length (including terminating NULL character) or 0 (no string).
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE uint8_t DAP_GetProductFirmwareVersionString (char *str) {
|
|
||||||
(void)str;
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
|
|
||||||
///@}
|
|
||||||
|
|
||||||
|
|
||||||
//**************************************************************************************************
|
|
||||||
/**
|
|
||||||
\defgroup DAP_Config_PortIO_gr CMSIS-DAP Hardware I/O Pin Access
|
|
||||||
\ingroup DAP_ConfigIO_gr
|
|
||||||
@{
|
|
||||||
|
|
||||||
Standard I/O Pins of the CMSIS-DAP Hardware Debug Port support standard JTAG mode
|
|
||||||
and Serial Wire Debug (SWD) mode. In SWD mode only 2 pins are required to implement the debug
|
|
||||||
interface of a device. The following I/O Pins are provided:
|
|
||||||
|
|
||||||
JTAG I/O Pin | SWD I/O Pin | CMSIS-DAP Hardware pin mode
|
|
||||||
---------------------------- | -------------------- | ---------------------------------------------
|
|
||||||
TCK: Test Clock | SWCLK: Clock | Output Push/Pull
|
|
||||||
TMS: Test Mode Select | SWDIO: Data I/O | Output Push/Pull; Input (for receiving data)
|
|
||||||
TDI: Test Data Input | | Output Push/Pull
|
|
||||||
TDO: Test Data Output | | Input
|
|
||||||
nTRST: Test Reset (optional) | | Output Open Drain with pull-up resistor
|
|
||||||
nRESET: Device Reset | nRESET: Device Reset | Output Open Drain with pull-up resistor
|
|
||||||
|
|
||||||
|
|
||||||
DAP Hardware I/O Pin Access Functions
|
|
||||||
-------------------------------------
|
|
||||||
The various I/O Pins are accessed by functions that implement the Read, Write, Set, or Clear to
|
|
||||||
these I/O Pins.
|
|
||||||
|
|
||||||
For the SWDIO I/O Pin there are additional functions that are called in SWD I/O mode only.
|
|
||||||
This functions are provided to achieve faster I/O that is possible with some advanced GPIO
|
|
||||||
peripherals that can independently write/read a single I/O pin without affecting any other pins
|
|
||||||
of the same I/O port. The following SWDIO I/O Pin functions are provided:
|
|
||||||
- \ref PIN_SWDIO_OUT_ENABLE to enable the output mode from the DAP hardware.
|
|
||||||
- \ref PIN_SWDIO_OUT_DISABLE to enable the input mode to the DAP hardware.
|
|
||||||
- \ref PIN_SWDIO_IN to read from the SWDIO I/O pin with utmost possible speed.
|
|
||||||
- \ref PIN_SWDIO_OUT to write to the SWDIO I/O pin with utmost possible speed.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
// Configure DAP I/O pins ------------------------------
|
|
||||||
|
|
||||||
/** Setup JTAG I/O pins: TCK, TMS, TDI, TDO, nTRST, and nRESET.
|
|
||||||
Configures the DAP Hardware I/O pins for JTAG mode:
|
|
||||||
- TCK, TMS, TDI, nTRST, nRESET to output mode and set to high level.
|
|
||||||
- TDO to input mode.
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE void PORT_JTAG_SETUP (void) {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Setup SWD I/O pins: SWCLK, SWDIO, and nRESET.
|
|
||||||
Configures the DAP Hardware I/O pins for Serial Wire Debug (SWD) mode:
|
|
||||||
- SWCLK, SWDIO, nRESET to output mode and set to default high level.
|
|
||||||
- TDI, nTRST to HighZ mode (pins are unused in SWD mode).
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE void PORT_SWD_SETUP (void) {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Disable JTAG/SWD I/O Pins.
|
|
||||||
Disables the DAP Hardware I/O pins which configures:
|
|
||||||
- TCK/SWCLK, TMS/SWDIO, TDI, TDO, nTRST, nRESET to High-Z mode.
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE void PORT_OFF (void) {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// SWCLK/TCK I/O pin -------------------------------------
|
|
||||||
|
|
||||||
/** SWCLK/TCK I/O pin: Get Input.
|
|
||||||
\return Current status of the SWCLK/TCK DAP hardware I/O pin.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t PIN_SWCLK_TCK_IN (void) {
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** SWCLK/TCK I/O pin: Set Output to High.
|
|
||||||
Set the SWCLK/TCK DAP hardware I/O pin to high level.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void PIN_SWCLK_TCK_SET (void) {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** SWCLK/TCK I/O pin: Set Output to Low.
|
|
||||||
Set the SWCLK/TCK DAP hardware I/O pin to low level.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void PIN_SWCLK_TCK_CLR (void) {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// SWDIO/TMS Pin I/O --------------------------------------
|
|
||||||
|
|
||||||
/** SWDIO/TMS I/O pin: Get Input.
|
|
||||||
\return Current status of the SWDIO/TMS DAP hardware I/O pin.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t PIN_SWDIO_TMS_IN (void) {
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** SWDIO/TMS I/O pin: Set Output to High.
|
|
||||||
Set the SWDIO/TMS DAP hardware I/O pin to high level.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void PIN_SWDIO_TMS_SET (void) {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** SWDIO/TMS I/O pin: Set Output to Low.
|
|
||||||
Set the SWDIO/TMS DAP hardware I/O pin to low level.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void PIN_SWDIO_TMS_CLR (void) {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** SWDIO I/O pin: Get Input (used in SWD mode only).
|
|
||||||
\return Current status of the SWDIO DAP hardware I/O pin.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t PIN_SWDIO_IN (void) {
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** SWDIO I/O pin: Set Output (used in SWD mode only).
|
|
||||||
\param bit Output value for the SWDIO DAP hardware I/O pin.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void PIN_SWDIO_OUT (uint32_t bit) {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** SWDIO I/O pin: Switch to Output mode (used in SWD mode only).
|
|
||||||
Configure the SWDIO DAP hardware I/O pin to output mode. This function is
|
|
||||||
called prior \ref PIN_SWDIO_OUT function calls.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void PIN_SWDIO_OUT_ENABLE (void) {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** SWDIO I/O pin: Switch to Input mode (used in SWD mode only).
|
|
||||||
Configure the SWDIO DAP hardware I/O pin to input mode. This function is
|
|
||||||
called prior \ref PIN_SWDIO_IN function calls.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void PIN_SWDIO_OUT_DISABLE (void) {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// TDI Pin I/O ---------------------------------------------
|
|
||||||
|
|
||||||
/** TDI I/O pin: Get Input.
|
|
||||||
\return Current status of the TDI DAP hardware I/O pin.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t PIN_TDI_IN (void) {
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** TDI I/O pin: Set Output.
|
|
||||||
\param bit Output value for the TDI DAP hardware I/O pin.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void PIN_TDI_OUT (uint32_t bit) {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// TDO Pin I/O ---------------------------------------------
|
|
||||||
|
|
||||||
/** TDO I/O pin: Get Input.
|
|
||||||
\return Current status of the TDO DAP hardware I/O pin.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t PIN_TDO_IN (void) {
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// nTRST Pin I/O -------------------------------------------
|
|
||||||
|
|
||||||
/** nTRST I/O pin: Get Input.
|
|
||||||
\return Current status of the nTRST DAP hardware I/O pin.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t PIN_nTRST_IN (void) {
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** nTRST I/O pin: Set Output.
|
|
||||||
\param bit JTAG TRST Test Reset pin status:
|
|
||||||
- 0: issue a JTAG TRST Test Reset.
|
|
||||||
- 1: release JTAG TRST Test Reset.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void PIN_nTRST_OUT (uint32_t bit) {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
// nRESET Pin I/O------------------------------------------
|
|
||||||
|
|
||||||
/** nRESET I/O pin: Get Input.
|
|
||||||
\return Current status of the nRESET DAP hardware I/O pin.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE uint32_t PIN_nRESET_IN (void) {
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** nRESET I/O pin: Set Output.
|
|
||||||
\param bit target device hardware reset pin status:
|
|
||||||
- 0: issue a device hardware reset.
|
|
||||||
- 1: release device hardware reset.
|
|
||||||
*/
|
|
||||||
__STATIC_FORCEINLINE void PIN_nRESET_OUT (uint32_t bit) {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
///@}
|
|
||||||
|
|
||||||
|
|
||||||
//**************************************************************************************************
|
|
||||||
/**
|
|
||||||
\defgroup DAP_Config_LEDs_gr CMSIS-DAP Hardware Status LEDs
|
|
||||||
\ingroup DAP_ConfigIO_gr
|
|
||||||
@{
|
|
||||||
|
|
||||||
CMSIS-DAP Hardware may provide LEDs that indicate the status of the CMSIS-DAP Debug Unit.
|
|
||||||
|
|
||||||
It is recommended to provide the following LEDs for status indication:
|
|
||||||
- Connect LED: is active when the DAP hardware is connected to a debugger.
|
|
||||||
- Running LED: is active when the debugger has put the target device into running state.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/** Debug Unit: Set status of Connected LED.
|
|
||||||
\param bit status of the Connect LED.
|
|
||||||
- 1: Connect LED ON: debugger is connected to CMSIS-DAP Debug Unit.
|
|
||||||
- 0: Connect LED OFF: debugger is not connected to CMSIS-DAP Debug Unit.
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE void LED_CONNECTED_OUT (uint32_t bit) {}
|
|
||||||
|
|
||||||
/** Debug Unit: Set status Target Running LED.
|
|
||||||
\param bit status of the Target Running LED.
|
|
||||||
- 1: Target Running LED ON: program execution in target started.
|
|
||||||
- 0: Target Running LED OFF: program execution in target stopped.
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE void LED_RUNNING_OUT (uint32_t bit) {}
|
|
||||||
|
|
||||||
///@}
|
|
||||||
|
|
||||||
|
|
||||||
//**************************************************************************************************
|
|
||||||
/**
|
|
||||||
\defgroup DAP_Config_Timestamp_gr CMSIS-DAP Timestamp
|
|
||||||
\ingroup DAP_ConfigIO_gr
|
|
||||||
@{
|
|
||||||
Access function for Test Domain Timer.
|
|
||||||
|
|
||||||
The value of the Test Domain Timer in the Debug Unit is returned by the function \ref TIMESTAMP_GET. By
|
|
||||||
default, the DWT timer is used. The frequency of this timer is configured with \ref TIMESTAMP_CLOCK.
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
/** Get timestamp of Test Domain Timer.
|
|
||||||
\return Current timestamp value.
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE uint32_t TIMESTAMP_GET (void) {
|
|
||||||
return (DWT->CYCCNT);
|
|
||||||
}
|
|
||||||
|
|
||||||
///@}
|
|
||||||
|
|
||||||
|
|
||||||
//**************************************************************************************************
|
|
||||||
/**
|
|
||||||
\defgroup DAP_Config_Initialization_gr CMSIS-DAP Initialization
|
|
||||||
\ingroup DAP_ConfigIO_gr
|
|
||||||
@{
|
|
||||||
|
|
||||||
CMSIS-DAP Hardware I/O and LED Pins are initialized with the function \ref DAP_SETUP.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/** Setup of the Debug Unit I/O pins and LEDs (called when Debug Unit is initialized).
|
|
||||||
This function performs the initialization of the CMSIS-DAP Hardware I/O Pins and the
|
|
||||||
Status LEDs. In detail the operation of Hardware I/O and LED pins are enabled and set:
|
|
||||||
- I/O clock system enabled.
|
|
||||||
- all I/O pins: input buffer enabled, output pins are set to HighZ mode.
|
|
||||||
- for nTRST, nRESET a weak pull-up (if available) is enabled.
|
|
||||||
- LED output pins are enabled and LEDs are turned off.
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE void DAP_SETUP (void) {
|
|
||||||
;
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Reset Target Device with custom specific I/O pin or command sequence.
|
|
||||||
This function allows the optional implementation of a device specific reset sequence.
|
|
||||||
It is called when the command \ref DAP_ResetTarget and is for example required
|
|
||||||
when a device needs a time-critical unlock sequence that enables the debug port.
|
|
||||||
\return 0 = no device specific reset sequence is implemented.\n
|
|
||||||
1 = a device specific reset sequence is implemented.
|
|
||||||
*/
|
|
||||||
__STATIC_INLINE uint8_t RESET_TARGET (void) {
|
|
||||||
return (0U); // change to '1' when a device reset sequence is implemented
|
|
||||||
}
|
|
||||||
|
|
||||||
///@}
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* __DAP_CONFIG_H__ */
|
|
||||||
@ -1,367 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2013-2022 ARM Limited. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*
|
|
||||||
* ----------------------------------------------------------------------
|
|
||||||
*
|
|
||||||
* $Date: 26. April 2022
|
|
||||||
* $Revision: V2.1.1
|
|
||||||
*
|
|
||||||
* Project: CMSIS-DAP Include
|
|
||||||
* Title: DAP.h Definitions
|
|
||||||
*
|
|
||||||
*---------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
#ifndef __DAP_H__
|
|
||||||
#define __DAP_H__
|
|
||||||
|
|
||||||
|
|
||||||
// DAP Firmware Version
|
|
||||||
#ifdef DAP_FW_V1
|
|
||||||
#define DAP_FW_VER "1.3.0"
|
|
||||||
#else
|
|
||||||
#define DAP_FW_VER "2.1.1"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// DAP Command IDs
|
|
||||||
#define ID_DAP_Info 0x00U
|
|
||||||
#define ID_DAP_HostStatus 0x01U
|
|
||||||
#define ID_DAP_Connect 0x02U
|
|
||||||
#define ID_DAP_Disconnect 0x03U
|
|
||||||
#define ID_DAP_TransferConfigure 0x04U
|
|
||||||
#define ID_DAP_Transfer 0x05U
|
|
||||||
#define ID_DAP_TransferBlock 0x06U
|
|
||||||
#define ID_DAP_TransferAbort 0x07U
|
|
||||||
#define ID_DAP_WriteABORT 0x08U
|
|
||||||
#define ID_DAP_Delay 0x09U
|
|
||||||
#define ID_DAP_ResetTarget 0x0AU
|
|
||||||
#define ID_DAP_SWJ_Pins 0x10U
|
|
||||||
#define ID_DAP_SWJ_Clock 0x11U
|
|
||||||
#define ID_DAP_SWJ_Sequence 0x12U
|
|
||||||
#define ID_DAP_SWD_Configure 0x13U
|
|
||||||
#define ID_DAP_SWD_Sequence 0x1DU
|
|
||||||
#define ID_DAP_JTAG_Sequence 0x14U
|
|
||||||
#define ID_DAP_JTAG_Configure 0x15U
|
|
||||||
#define ID_DAP_JTAG_IDCODE 0x16U
|
|
||||||
#define ID_DAP_SWO_Transport 0x17U
|
|
||||||
#define ID_DAP_SWO_Mode 0x18U
|
|
||||||
#define ID_DAP_SWO_Baudrate 0x19U
|
|
||||||
#define ID_DAP_SWO_Control 0x1AU
|
|
||||||
#define ID_DAP_SWO_Status 0x1BU
|
|
||||||
#define ID_DAP_SWO_ExtendedStatus 0x1EU
|
|
||||||
#define ID_DAP_SWO_Data 0x1CU
|
|
||||||
#define ID_DAP_UART_Transport 0x1FU
|
|
||||||
#define ID_DAP_UART_Configure 0x20U
|
|
||||||
#define ID_DAP_UART_Control 0x22U
|
|
||||||
#define ID_DAP_UART_Status 0x23U
|
|
||||||
#define ID_DAP_UART_Transfer 0x21U
|
|
||||||
|
|
||||||
#define ID_DAP_QueueCommands 0x7EU
|
|
||||||
#define ID_DAP_ExecuteCommands 0x7FU
|
|
||||||
|
|
||||||
// DAP Vendor Command IDs
|
|
||||||
#define ID_DAP_Vendor0 0x80U
|
|
||||||
#define ID_DAP_Vendor1 0x81U
|
|
||||||
#define ID_DAP_Vendor2 0x82U
|
|
||||||
#define ID_DAP_Vendor3 0x83U
|
|
||||||
#define ID_DAP_Vendor4 0x84U
|
|
||||||
#define ID_DAP_Vendor5 0x85U
|
|
||||||
#define ID_DAP_Vendor6 0x86U
|
|
||||||
#define ID_DAP_Vendor7 0x87U
|
|
||||||
#define ID_DAP_Vendor8 0x88U
|
|
||||||
#define ID_DAP_Vendor9 0x89U
|
|
||||||
#define ID_DAP_Vendor10 0x8AU
|
|
||||||
#define ID_DAP_Vendor11 0x8BU
|
|
||||||
#define ID_DAP_Vendor12 0x8CU
|
|
||||||
#define ID_DAP_Vendor13 0x8DU
|
|
||||||
#define ID_DAP_Vendor14 0x8EU
|
|
||||||
#define ID_DAP_Vendor15 0x8FU
|
|
||||||
#define ID_DAP_Vendor16 0x90U
|
|
||||||
#define ID_DAP_Vendor17 0x91U
|
|
||||||
#define ID_DAP_Vendor18 0x92U
|
|
||||||
#define ID_DAP_Vendor19 0x93U
|
|
||||||
#define ID_DAP_Vendor20 0x94U
|
|
||||||
#define ID_DAP_Vendor21 0x95U
|
|
||||||
#define ID_DAP_Vendor22 0x96U
|
|
||||||
#define ID_DAP_Vendor23 0x97U
|
|
||||||
#define ID_DAP_Vendor24 0x98U
|
|
||||||
#define ID_DAP_Vendor25 0x99U
|
|
||||||
#define ID_DAP_Vendor26 0x9AU
|
|
||||||
#define ID_DAP_Vendor27 0x9BU
|
|
||||||
#define ID_DAP_Vendor28 0x9CU
|
|
||||||
#define ID_DAP_Vendor29 0x9DU
|
|
||||||
#define ID_DAP_Vendor30 0x9EU
|
|
||||||
#define ID_DAP_Vendor31 0x9FU
|
|
||||||
|
|
||||||
#define ID_DAP_Invalid 0xFFU
|
|
||||||
|
|
||||||
// DAP Status Code
|
|
||||||
#define DAP_OK 0U
|
|
||||||
#define DAP_ERROR 0xFFU
|
|
||||||
|
|
||||||
// DAP ID
|
|
||||||
#define DAP_ID_VENDOR 1U
|
|
||||||
#define DAP_ID_PRODUCT 2U
|
|
||||||
#define DAP_ID_SER_NUM 3U
|
|
||||||
#define DAP_ID_DAP_FW_VER 4U
|
|
||||||
#define DAP_ID_DEVICE_VENDOR 5U
|
|
||||||
#define DAP_ID_DEVICE_NAME 6U
|
|
||||||
#define DAP_ID_BOARD_VENDOR 7U
|
|
||||||
#define DAP_ID_BOARD_NAME 8U
|
|
||||||
#define DAP_ID_PRODUCT_FW_VER 9U
|
|
||||||
#define DAP_ID_CAPABILITIES 0xF0U
|
|
||||||
#define DAP_ID_TIMESTAMP_CLOCK 0xF1U
|
|
||||||
#define DAP_ID_UART_RX_BUFFER_SIZE 0xFBU
|
|
||||||
#define DAP_ID_UART_TX_BUFFER_SIZE 0xFCU
|
|
||||||
#define DAP_ID_SWO_BUFFER_SIZE 0xFDU
|
|
||||||
#define DAP_ID_PACKET_COUNT 0xFEU
|
|
||||||
#define DAP_ID_PACKET_SIZE 0xFFU
|
|
||||||
|
|
||||||
// DAP Host Status
|
|
||||||
#define DAP_DEBUGGER_CONNECTED 0U
|
|
||||||
#define DAP_TARGET_RUNNING 1U
|
|
||||||
|
|
||||||
// DAP Port
|
|
||||||
#define DAP_PORT_AUTODETECT 0U // Autodetect Port
|
|
||||||
#define DAP_PORT_DISABLED 0U // Port Disabled (I/O pins in High-Z)
|
|
||||||
#define DAP_PORT_SWD 1U // SWD Port (SWCLK, SWDIO) + nRESET
|
|
||||||
#define DAP_PORT_JTAG 2U // JTAG Port (TCK, TMS, TDI, TDO, nTRST) + nRESET
|
|
||||||
|
|
||||||
// DAP SWJ Pins
|
|
||||||
#define DAP_SWJ_SWCLK_TCK 0 // SWCLK/TCK
|
|
||||||
#define DAP_SWJ_SWDIO_TMS 1 // SWDIO/TMS
|
|
||||||
#define DAP_SWJ_TDI 2 // TDI
|
|
||||||
#define DAP_SWJ_TDO 3 // TDO
|
|
||||||
#define DAP_SWJ_nTRST 5 // nTRST
|
|
||||||
#define DAP_SWJ_nRESET 7 // nRESET
|
|
||||||
|
|
||||||
// DAP Transfer Request
|
|
||||||
#define DAP_TRANSFER_APnDP (1U<<0)
|
|
||||||
#define DAP_TRANSFER_RnW (1U<<1)
|
|
||||||
#define DAP_TRANSFER_A2 (1U<<2)
|
|
||||||
#define DAP_TRANSFER_A3 (1U<<3)
|
|
||||||
#define DAP_TRANSFER_MATCH_VALUE (1U<<4)
|
|
||||||
#define DAP_TRANSFER_MATCH_MASK (1U<<5)
|
|
||||||
#define DAP_TRANSFER_TIMESTAMP (1U<<7)
|
|
||||||
|
|
||||||
// DAP Transfer Response
|
|
||||||
#define DAP_TRANSFER_OK (1U<<0)
|
|
||||||
#define DAP_TRANSFER_WAIT (1U<<1)
|
|
||||||
#define DAP_TRANSFER_FAULT (1U<<2)
|
|
||||||
#define DAP_TRANSFER_ERROR (1U<<3)
|
|
||||||
#define DAP_TRANSFER_MISMATCH (1U<<4)
|
|
||||||
|
|
||||||
// DAP SWO Trace Mode
|
|
||||||
#define DAP_SWO_OFF 0U
|
|
||||||
#define DAP_SWO_UART 1U
|
|
||||||
#define DAP_SWO_MANCHESTER 2U
|
|
||||||
|
|
||||||
// DAP SWO Trace Status
|
|
||||||
#define DAP_SWO_CAPTURE_ACTIVE (1U<<0)
|
|
||||||
#define DAP_SWO_CAPTURE_PAUSED (1U<<1)
|
|
||||||
#define DAP_SWO_STREAM_ERROR (1U<<6)
|
|
||||||
#define DAP_SWO_BUFFER_OVERRUN (1U<<7)
|
|
||||||
|
|
||||||
// DAP UART Transport
|
|
||||||
#define DAP_UART_TRANSPORT_NONE 0U
|
|
||||||
#define DAP_UART_TRANSPORT_USB_COM_PORT 1U
|
|
||||||
#define DAP_UART_TRANSPORT_DAP_COMMAND 2U
|
|
||||||
|
|
||||||
// DAP UART Control
|
|
||||||
#define DAP_UART_CONTROL_RX_ENABLE (1U<<0)
|
|
||||||
#define DAP_UART_CONTROL_RX_DISABLE (1U<<1)
|
|
||||||
#define DAP_UART_CONTROL_RX_BUF_FLUSH (1U<<2)
|
|
||||||
#define DAP_UART_CONTROL_TX_ENABLE (1U<<4)
|
|
||||||
#define DAP_UART_CONTROL_TX_DISABLE (1U<<5)
|
|
||||||
#define DAP_UART_CONTROL_TX_BUF_FLUSH (1U<<6)
|
|
||||||
|
|
||||||
// DAP UART Status
|
|
||||||
#define DAP_UART_STATUS_RX_ENABLED (1U<<0)
|
|
||||||
#define DAP_UART_STATUS_RX_DATA_LOST (1U<<1)
|
|
||||||
#define DAP_UART_STATUS_FRAMING_ERROR (1U<<2)
|
|
||||||
#define DAP_UART_STATUS_PARITY_ERROR (1U<<3)
|
|
||||||
#define DAP_UART_STATUS_TX_ENABLED (1U<<4)
|
|
||||||
|
|
||||||
// DAP UART Configure Error
|
|
||||||
#define DAP_UART_CFG_ERROR_DATA_BITS (1U<<0)
|
|
||||||
#define DAP_UART_CFG_ERROR_PARITY (1U<<1)
|
|
||||||
#define DAP_UART_CFG_ERROR_STOP_BITS (1U<<2)
|
|
||||||
|
|
||||||
// Debug Port Register Addresses
|
|
||||||
#define DP_IDCODE 0x00U // IDCODE Register (SW Read only)
|
|
||||||
#define DP_ABORT 0x00U // Abort Register (SW Write only)
|
|
||||||
#define DP_CTRL_STAT 0x04U // Control & Status
|
|
||||||
#define DP_WCR 0x04U // Wire Control Register (SW Only)
|
|
||||||
#define DP_SELECT 0x08U // Select Register (JTAG R/W & SW W)
|
|
||||||
#define DP_RESEND 0x08U // Resend (SW Read Only)
|
|
||||||
#define DP_RDBUFF 0x0CU // Read Buffer (Read Only)
|
|
||||||
|
|
||||||
// JTAG IR Codes
|
|
||||||
#define JTAG_ABORT 0x08U
|
|
||||||
#define JTAG_DPACC 0x0AU
|
|
||||||
#define JTAG_APACC 0x0BU
|
|
||||||
#define JTAG_IDCODE 0x0EU
|
|
||||||
#define JTAG_BYPASS 0x0FU
|
|
||||||
|
|
||||||
// JTAG Sequence Info
|
|
||||||
#define JTAG_SEQUENCE_TCK 0x3FU // TCK count
|
|
||||||
#define JTAG_SEQUENCE_TMS 0x40U // TMS value
|
|
||||||
#define JTAG_SEQUENCE_TDO 0x80U // TDO capture
|
|
||||||
|
|
||||||
// SWD Sequence Info
|
|
||||||
#define SWD_SEQUENCE_CLK 0x3FU // SWCLK count
|
|
||||||
#define SWD_SEQUENCE_DIN 0x80U // SWDIO capture
|
|
||||||
|
|
||||||
|
|
||||||
#include <stddef.h>
|
|
||||||
#include <stdint.h>
|
|
||||||
#include "cmsis_compiler.h"
|
|
||||||
|
|
||||||
// DAP Data structure
|
|
||||||
typedef struct {
|
|
||||||
uint8_t debug_port; // Debug Port
|
|
||||||
uint8_t fast_clock; // Fast Clock Flag
|
|
||||||
uint8_t padding[2];
|
|
||||||
uint32_t clock_delay; // Clock Delay
|
|
||||||
uint32_t timestamp; // Last captured Timestamp
|
|
||||||
struct { // Transfer Configuration
|
|
||||||
uint8_t idle_cycles; // Idle cycles after transfer
|
|
||||||
uint8_t padding[3];
|
|
||||||
uint16_t retry_count; // Number of retries after WAIT response
|
|
||||||
uint16_t match_retry; // Number of retries if read value does not match
|
|
||||||
uint32_t match_mask; // Match Mask
|
|
||||||
} transfer;
|
|
||||||
#if (DAP_SWD != 0)
|
|
||||||
struct { // SWD Configuration
|
|
||||||
uint8_t turnaround; // Turnaround period
|
|
||||||
uint8_t data_phase; // Always generate Data Phase
|
|
||||||
} swd_conf;
|
|
||||||
#endif
|
|
||||||
#if (DAP_JTAG != 0)
|
|
||||||
struct { // JTAG Device Chain
|
|
||||||
uint8_t count; // Number of devices
|
|
||||||
uint8_t index; // Device index (device at TDO has index 0)
|
|
||||||
#if (DAP_JTAG_DEV_CNT != 0)
|
|
||||||
uint8_t ir_length[DAP_JTAG_DEV_CNT]; // IR Length in bits
|
|
||||||
uint16_t ir_before[DAP_JTAG_DEV_CNT]; // Bits before IR
|
|
||||||
uint16_t ir_after [DAP_JTAG_DEV_CNT]; // Bits after IR
|
|
||||||
#endif
|
|
||||||
} jtag_dev;
|
|
||||||
#endif
|
|
||||||
} DAP_Data_t;
|
|
||||||
|
|
||||||
extern DAP_Data_t DAP_Data; // DAP Data
|
|
||||||
extern volatile uint8_t DAP_TransferAbort; // Transfer Abort Flag
|
|
||||||
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C"
|
|
||||||
{
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Functions
|
|
||||||
extern void SWJ_Sequence (uint32_t count, const uint8_t *data);
|
|
||||||
extern void SWD_Sequence (uint32_t info, const uint8_t *swdo, uint8_t *swdi);
|
|
||||||
extern void JTAG_Sequence (uint32_t info, const uint8_t *tdi, uint8_t *tdo);
|
|
||||||
extern void JTAG_IR (uint32_t ir);
|
|
||||||
extern uint32_t JTAG_ReadIDCode (void);
|
|
||||||
extern void JTAG_WriteAbort (uint32_t data);
|
|
||||||
extern uint8_t JTAG_Transfer (uint32_t request, uint32_t *data);
|
|
||||||
extern uint8_t SWD_Transfer (uint32_t request, uint32_t *data);
|
|
||||||
|
|
||||||
extern void Delayms (uint32_t delay);
|
|
||||||
|
|
||||||
extern uint32_t SWO_Transport (const uint8_t *request, uint8_t *response);
|
|
||||||
extern uint32_t SWO_Mode (const uint8_t *request, uint8_t *response);
|
|
||||||
extern uint32_t SWO_Baudrate (const uint8_t *request, uint8_t *response);
|
|
||||||
extern uint32_t SWO_Control (const uint8_t *request, uint8_t *response);
|
|
||||||
extern uint32_t SWO_Status (uint8_t *response);
|
|
||||||
extern uint32_t SWO_ExtendedStatus (const uint8_t *request, uint8_t *response);
|
|
||||||
extern uint32_t SWO_Data (const uint8_t *request, uint8_t *response);
|
|
||||||
|
|
||||||
extern void SWO_QueueTransfer (uint8_t *buf, uint32_t num);
|
|
||||||
extern void SWO_AbortTransfer (void);
|
|
||||||
extern void SWO_TransferComplete (void);
|
|
||||||
|
|
||||||
extern uint32_t SWO_Mode_UART (uint32_t enable);
|
|
||||||
extern uint32_t SWO_Baudrate_UART (uint32_t baudrate);
|
|
||||||
extern uint32_t SWO_Control_UART (uint32_t active);
|
|
||||||
extern void SWO_Capture_UART (uint8_t *buf, uint32_t num);
|
|
||||||
extern uint32_t SWO_GetCount_UART (void);
|
|
||||||
|
|
||||||
extern uint32_t SWO_Mode_Manchester (uint32_t enable);
|
|
||||||
extern uint32_t SWO_Baudrate_Manchester (uint32_t baudrate);
|
|
||||||
extern uint32_t SWO_Control_Manchester (uint32_t active);
|
|
||||||
extern void SWO_Capture_Manchester (uint8_t *buf, uint32_t num);
|
|
||||||
extern uint32_t SWO_GetCount_Manchester (void);
|
|
||||||
|
|
||||||
extern uint32_t UART_Transport (const uint8_t *request, uint8_t *response);
|
|
||||||
extern uint32_t UART_Configure (const uint8_t *request, uint8_t *response);
|
|
||||||
extern uint32_t UART_Control (const uint8_t *request, uint8_t *response);
|
|
||||||
extern uint32_t UART_Status (uint8_t *response);
|
|
||||||
extern uint32_t UART_Transfer (const uint8_t *request, uint8_t *response);
|
|
||||||
|
|
||||||
extern uint8_t USB_COM_PORT_Activate (uint32_t cmd);
|
|
||||||
|
|
||||||
extern uint32_t DAP_ProcessVendorCommand (const uint8_t *request, uint8_t *response);
|
|
||||||
extern uint32_t DAP_ProcessCommand (const uint8_t *request, uint8_t *response);
|
|
||||||
extern uint32_t DAP_ExecuteCommand (const uint8_t *request, uint8_t *response);
|
|
||||||
|
|
||||||
extern void DAP_Setup (void);
|
|
||||||
|
|
||||||
// Configurable delay for clock generation
|
|
||||||
#ifndef DELAY_SLOW_CYCLES
|
|
||||||
#define DELAY_SLOW_CYCLES 3U // Number of cycles for one iteration
|
|
||||||
#endif
|
|
||||||
#if defined(__CC_ARM)
|
|
||||||
__STATIC_FORCEINLINE void PIN_DELAY_SLOW (uint32_t delay) {
|
|
||||||
uint32_t count = delay;
|
|
||||||
while (--count);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
__STATIC_FORCEINLINE void PIN_DELAY_SLOW (uint32_t delay) {
|
|
||||||
__ASM volatile (
|
|
||||||
".syntax unified\n"
|
|
||||||
"0:\n\t"
|
|
||||||
"subs %0,%0,#1\n\t"
|
|
||||||
"bne 0b\n"
|
|
||||||
: "+l" (delay) : : "cc"
|
|
||||||
);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Fixed delay for fast clock generation
|
|
||||||
#ifndef DELAY_FAST_CYCLES
|
|
||||||
#define DELAY_FAST_CYCLES 0U // Number of cycles: 0..3
|
|
||||||
#endif
|
|
||||||
__STATIC_FORCEINLINE void PIN_DELAY_FAST (void) {
|
|
||||||
#if (DELAY_FAST_CYCLES >= 1U)
|
|
||||||
__NOP();
|
|
||||||
#endif
|
|
||||||
#if (DELAY_FAST_CYCLES >= 2U)
|
|
||||||
__NOP();
|
|
||||||
#endif
|
|
||||||
#if (DELAY_FAST_CYCLES >= 3U)
|
|
||||||
__NOP();
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* __DAP_H__ */
|
|
||||||
File diff suppressed because it is too large
Load Diff
@ -1,100 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2013-2017 ARM Limited. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*
|
|
||||||
* ----------------------------------------------------------------------
|
|
||||||
*
|
|
||||||
* $Date: 1. December 2017
|
|
||||||
* $Revision: V2.0.0
|
|
||||||
*
|
|
||||||
* Project: CMSIS-DAP Source
|
|
||||||
* Title: DAP_vendor.c CMSIS-DAP Vendor Commands
|
|
||||||
*
|
|
||||||
*---------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
#include "DAP_config.h"
|
|
||||||
#include "DAP.h"
|
|
||||||
|
|
||||||
//**************************************************************************************************
|
|
||||||
/**
|
|
||||||
\defgroup DAP_Vendor_Adapt_gr Adapt Vendor Commands
|
|
||||||
\ingroup DAP_Vendor_gr
|
|
||||||
@{
|
|
||||||
|
|
||||||
The file DAP_vendor.c provides template source code for extension of a Debug Unit with
|
|
||||||
Vendor Commands. Copy this file to the project folder of the Debug Unit and add the
|
|
||||||
file to the MDK-ARM project under the file group Configuration.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/** Process DAP Vendor Command and prepare Response Data
|
|
||||||
\param request pointer to request data
|
|
||||||
\param response pointer to response data
|
|
||||||
\return number of bytes in response (lower 16 bits)
|
|
||||||
number of bytes in request (upper 16 bits)
|
|
||||||
*/
|
|
||||||
uint32_t DAP_ProcessVendorCommand(const uint8_t *request, uint8_t *response) {
|
|
||||||
uint32_t num = (1U << 16) | 1U;
|
|
||||||
|
|
||||||
*response++ = *request; // copy Command ID
|
|
||||||
|
|
||||||
switch (*request++) { // first byte in request is Command ID
|
|
||||||
case ID_DAP_Vendor0:
|
|
||||||
#if 0 // example user command
|
|
||||||
num += 1U << 16; // increment request count
|
|
||||||
if (*request == 1U) { // when first command data byte is 1
|
|
||||||
*response++ = 'X'; // send 'X' as response
|
|
||||||
num++; // increment response count
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
|
|
||||||
case ID_DAP_Vendor1: break;
|
|
||||||
case ID_DAP_Vendor2: break;
|
|
||||||
case ID_DAP_Vendor3: break;
|
|
||||||
case ID_DAP_Vendor4: break;
|
|
||||||
case ID_DAP_Vendor5: break;
|
|
||||||
case ID_DAP_Vendor6: break;
|
|
||||||
case ID_DAP_Vendor7: break;
|
|
||||||
case ID_DAP_Vendor8: break;
|
|
||||||
case ID_DAP_Vendor9: break;
|
|
||||||
case ID_DAP_Vendor10: break;
|
|
||||||
case ID_DAP_Vendor11: break;
|
|
||||||
case ID_DAP_Vendor12: break;
|
|
||||||
case ID_DAP_Vendor13: break;
|
|
||||||
case ID_DAP_Vendor14: break;
|
|
||||||
case ID_DAP_Vendor15: break;
|
|
||||||
case ID_DAP_Vendor16: break;
|
|
||||||
case ID_DAP_Vendor17: break;
|
|
||||||
case ID_DAP_Vendor18: break;
|
|
||||||
case ID_DAP_Vendor19: break;
|
|
||||||
case ID_DAP_Vendor20: break;
|
|
||||||
case ID_DAP_Vendor21: break;
|
|
||||||
case ID_DAP_Vendor22: break;
|
|
||||||
case ID_DAP_Vendor23: break;
|
|
||||||
case ID_DAP_Vendor24: break;
|
|
||||||
case ID_DAP_Vendor25: break;
|
|
||||||
case ID_DAP_Vendor26: break;
|
|
||||||
case ID_DAP_Vendor27: break;
|
|
||||||
case ID_DAP_Vendor28: break;
|
|
||||||
case ID_DAP_Vendor29: break;
|
|
||||||
case ID_DAP_Vendor30: break;
|
|
||||||
case ID_DAP_Vendor31: break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (num);
|
|
||||||
}
|
|
||||||
|
|
||||||
///@}
|
|
||||||
@ -1,370 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2013-2017 ARM Limited. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*
|
|
||||||
* ----------------------------------------------------------------------
|
|
||||||
*
|
|
||||||
* $Date: 1. December 2017
|
|
||||||
* $Revision: V2.0.0
|
|
||||||
*
|
|
||||||
* Project: CMSIS-DAP Source
|
|
||||||
* Title: JTAG_DP.c CMSIS-DAP JTAG DP I/O
|
|
||||||
*
|
|
||||||
*---------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
#include "DAP_config.h"
|
|
||||||
#include "DAP.h"
|
|
||||||
|
|
||||||
|
|
||||||
// JTAG Macros
|
|
||||||
|
|
||||||
#define PIN_TCK_SET PIN_SWCLK_TCK_SET
|
|
||||||
#define PIN_TCK_CLR PIN_SWCLK_TCK_CLR
|
|
||||||
#define PIN_TMS_SET PIN_SWDIO_TMS_SET
|
|
||||||
#define PIN_TMS_CLR PIN_SWDIO_TMS_CLR
|
|
||||||
|
|
||||||
#define JTAG_CYCLE_TCK() \
|
|
||||||
PIN_TCK_CLR(); \
|
|
||||||
PIN_DELAY(); \
|
|
||||||
PIN_TCK_SET(); \
|
|
||||||
PIN_DELAY()
|
|
||||||
|
|
||||||
#define JTAG_CYCLE_TDI(tdi) \
|
|
||||||
PIN_TDI_OUT(tdi); \
|
|
||||||
PIN_TCK_CLR(); \
|
|
||||||
PIN_DELAY(); \
|
|
||||||
PIN_TCK_SET(); \
|
|
||||||
PIN_DELAY()
|
|
||||||
|
|
||||||
#define JTAG_CYCLE_TDO(tdo) \
|
|
||||||
PIN_TCK_CLR(); \
|
|
||||||
PIN_DELAY(); \
|
|
||||||
tdo = PIN_TDO_IN(); \
|
|
||||||
PIN_TCK_SET(); \
|
|
||||||
PIN_DELAY()
|
|
||||||
|
|
||||||
#define JTAG_CYCLE_TDIO(tdi,tdo) \
|
|
||||||
PIN_TDI_OUT(tdi); \
|
|
||||||
PIN_TCK_CLR(); \
|
|
||||||
PIN_DELAY(); \
|
|
||||||
tdo = PIN_TDO_IN(); \
|
|
||||||
PIN_TCK_SET(); \
|
|
||||||
PIN_DELAY()
|
|
||||||
|
|
||||||
#define PIN_DELAY() PIN_DELAY_SLOW(DAP_Data.clock_delay)
|
|
||||||
|
|
||||||
|
|
||||||
#if (DAP_JTAG != 0)
|
|
||||||
|
|
||||||
|
|
||||||
// Generate JTAG Sequence
|
|
||||||
// info: sequence information
|
|
||||||
// tdi: pointer to TDI generated data
|
|
||||||
// tdo: pointer to TDO captured data
|
|
||||||
// return: none
|
|
||||||
void JTAG_Sequence (uint32_t info, const uint8_t *tdi, uint8_t *tdo) {
|
|
||||||
uint32_t i_val;
|
|
||||||
uint32_t o_val;
|
|
||||||
uint32_t bit;
|
|
||||||
uint32_t n, k;
|
|
||||||
|
|
||||||
n = info & JTAG_SEQUENCE_TCK;
|
|
||||||
if (n == 0U) {
|
|
||||||
n = 64U;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (info & JTAG_SEQUENCE_TMS) {
|
|
||||||
PIN_TMS_SET();
|
|
||||||
} else {
|
|
||||||
PIN_TMS_CLR();
|
|
||||||
}
|
|
||||||
|
|
||||||
while (n) {
|
|
||||||
i_val = *tdi++;
|
|
||||||
o_val = 0U;
|
|
||||||
for (k = 8U; k && n; k--, n--) {
|
|
||||||
JTAG_CYCLE_TDIO(i_val, bit);
|
|
||||||
i_val >>= 1;
|
|
||||||
o_val >>= 1;
|
|
||||||
o_val |= bit << 7;
|
|
||||||
}
|
|
||||||
o_val >>= k;
|
|
||||||
if (info & JTAG_SEQUENCE_TDO) {
|
|
||||||
*tdo++ = (uint8_t)o_val;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// JTAG Set IR
|
|
||||||
// ir: IR value
|
|
||||||
// return: none
|
|
||||||
#define JTAG_IR_Function(speed) /**/ \
|
|
||||||
static void JTAG_IR_##speed (uint32_t ir) { \
|
|
||||||
uint32_t n; \
|
|
||||||
\
|
|
||||||
PIN_TMS_SET(); \
|
|
||||||
JTAG_CYCLE_TCK(); /* Select-DR-Scan */ \
|
|
||||||
JTAG_CYCLE_TCK(); /* Select-IR-Scan */ \
|
|
||||||
PIN_TMS_CLR(); \
|
|
||||||
JTAG_CYCLE_TCK(); /* Capture-IR */ \
|
|
||||||
JTAG_CYCLE_TCK(); /* Shift-IR */ \
|
|
||||||
\
|
|
||||||
PIN_TDI_OUT(1U); \
|
|
||||||
for (n = DAP_Data.jtag_dev.ir_before[DAP_Data.jtag_dev.index]; n; n--) { \
|
|
||||||
JTAG_CYCLE_TCK(); /* Bypass before data */ \
|
|
||||||
} \
|
|
||||||
for (n = DAP_Data.jtag_dev.ir_length[DAP_Data.jtag_dev.index] - 1U; n; n--) { \
|
|
||||||
JTAG_CYCLE_TDI(ir); /* Set IR bits (except last) */ \
|
|
||||||
ir >>= 1; \
|
|
||||||
} \
|
|
||||||
n = DAP_Data.jtag_dev.ir_after[DAP_Data.jtag_dev.index]; \
|
|
||||||
if (n) { \
|
|
||||||
JTAG_CYCLE_TDI(ir); /* Set last IR bit */ \
|
|
||||||
PIN_TDI_OUT(1U); \
|
|
||||||
for (--n; n; n--) { \
|
|
||||||
JTAG_CYCLE_TCK(); /* Bypass after data */ \
|
|
||||||
} \
|
|
||||||
PIN_TMS_SET(); \
|
|
||||||
JTAG_CYCLE_TCK(); /* Bypass & Exit1-IR */ \
|
|
||||||
} else { \
|
|
||||||
PIN_TMS_SET(); \
|
|
||||||
JTAG_CYCLE_TDI(ir); /* Set last IR bit & Exit1-IR */ \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
JTAG_CYCLE_TCK(); /* Update-IR */ \
|
|
||||||
PIN_TMS_CLR(); \
|
|
||||||
JTAG_CYCLE_TCK(); /* Idle */ \
|
|
||||||
PIN_TDI_OUT(1U); \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// JTAG Transfer I/O
|
|
||||||
// request: A[3:2] RnW APnDP
|
|
||||||
// data: DATA[31:0]
|
|
||||||
// return: ACK[2:0]
|
|
||||||
#define JTAG_TransferFunction(speed) /**/ \
|
|
||||||
static uint8_t JTAG_Transfer##speed (uint32_t request, uint32_t *data) { \
|
|
||||||
uint32_t ack; \
|
|
||||||
uint32_t bit; \
|
|
||||||
uint32_t val; \
|
|
||||||
uint32_t n; \
|
|
||||||
\
|
|
||||||
PIN_TMS_SET(); \
|
|
||||||
JTAG_CYCLE_TCK(); /* Select-DR-Scan */ \
|
|
||||||
PIN_TMS_CLR(); \
|
|
||||||
JTAG_CYCLE_TCK(); /* Capture-DR */ \
|
|
||||||
JTAG_CYCLE_TCK(); /* Shift-DR */ \
|
|
||||||
\
|
|
||||||
for (n = DAP_Data.jtag_dev.index; n; n--) { \
|
|
||||||
JTAG_CYCLE_TCK(); /* Bypass before data */ \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
JTAG_CYCLE_TDIO(request >> 1, bit); /* Set RnW, Get ACK.0 */ \
|
|
||||||
ack = bit << 1; \
|
|
||||||
JTAG_CYCLE_TDIO(request >> 2, bit); /* Set A2, Get ACK.1 */ \
|
|
||||||
ack |= bit << 0; \
|
|
||||||
JTAG_CYCLE_TDIO(request >> 3, bit); /* Set A3, Get ACK.2 */ \
|
|
||||||
ack |= bit << 2; \
|
|
||||||
\
|
|
||||||
if (ack != DAP_TRANSFER_OK) { \
|
|
||||||
/* Exit on error */ \
|
|
||||||
PIN_TMS_SET(); \
|
|
||||||
JTAG_CYCLE_TCK(); /* Exit1-DR */ \
|
|
||||||
goto exit; \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
if (request & DAP_TRANSFER_RnW) { \
|
|
||||||
/* Read Transfer */ \
|
|
||||||
val = 0U; \
|
|
||||||
for (n = 31U; n; n--) { \
|
|
||||||
JTAG_CYCLE_TDO(bit); /* Get D0..D30 */ \
|
|
||||||
val |= bit << 31; \
|
|
||||||
val >>= 1; \
|
|
||||||
} \
|
|
||||||
n = DAP_Data.jtag_dev.count - DAP_Data.jtag_dev.index - 1U; \
|
|
||||||
if (n) { \
|
|
||||||
JTAG_CYCLE_TDO(bit); /* Get D31 */ \
|
|
||||||
for (--n; n; n--) { \
|
|
||||||
JTAG_CYCLE_TCK(); /* Bypass after data */ \
|
|
||||||
} \
|
|
||||||
PIN_TMS_SET(); \
|
|
||||||
JTAG_CYCLE_TCK(); /* Bypass & Exit1-DR */ \
|
|
||||||
} else { \
|
|
||||||
PIN_TMS_SET(); \
|
|
||||||
JTAG_CYCLE_TDO(bit); /* Get D31 & Exit1-DR */ \
|
|
||||||
} \
|
|
||||||
val |= bit << 31; \
|
|
||||||
if (data) { *data = val; } \
|
|
||||||
} else { \
|
|
||||||
/* Write Transfer */ \
|
|
||||||
val = *data; \
|
|
||||||
for (n = 31U; n; n--) { \
|
|
||||||
JTAG_CYCLE_TDI(val); /* Set D0..D30 */ \
|
|
||||||
val >>= 1; \
|
|
||||||
} \
|
|
||||||
n = DAP_Data.jtag_dev.count - DAP_Data.jtag_dev.index - 1U; \
|
|
||||||
if (n) { \
|
|
||||||
JTAG_CYCLE_TDI(val); /* Set D31 */ \
|
|
||||||
for (--n; n; n--) { \
|
|
||||||
JTAG_CYCLE_TCK(); /* Bypass after data */ \
|
|
||||||
} \
|
|
||||||
PIN_TMS_SET(); \
|
|
||||||
JTAG_CYCLE_TCK(); /* Bypass & Exit1-DR */ \
|
|
||||||
} else { \
|
|
||||||
PIN_TMS_SET(); \
|
|
||||||
JTAG_CYCLE_TDI(val); /* Set D31 & Exit1-DR */ \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
exit: \
|
|
||||||
JTAG_CYCLE_TCK(); /* Update-DR */ \
|
|
||||||
PIN_TMS_CLR(); \
|
|
||||||
JTAG_CYCLE_TCK(); /* Idle */ \
|
|
||||||
PIN_TDI_OUT(1U); \
|
|
||||||
\
|
|
||||||
/* Capture Timestamp */ \
|
|
||||||
if (request & DAP_TRANSFER_TIMESTAMP) { \
|
|
||||||
DAP_Data.timestamp = TIMESTAMP_GET(); \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
/* Idle cycles */ \
|
|
||||||
n = DAP_Data.transfer.idle_cycles; \
|
|
||||||
while (n--) { \
|
|
||||||
JTAG_CYCLE_TCK(); /* Idle */ \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
return ((uint8_t)ack); \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#undef PIN_DELAY
|
|
||||||
#define PIN_DELAY() PIN_DELAY_FAST()
|
|
||||||
JTAG_IR_Function(Fast)
|
|
||||||
JTAG_TransferFunction(Fast)
|
|
||||||
|
|
||||||
#undef PIN_DELAY
|
|
||||||
#define PIN_DELAY() PIN_DELAY_SLOW(DAP_Data.clock_delay)
|
|
||||||
JTAG_IR_Function(Slow)
|
|
||||||
JTAG_TransferFunction(Slow)
|
|
||||||
|
|
||||||
|
|
||||||
// JTAG Read IDCODE register
|
|
||||||
// return: value read
|
|
||||||
uint32_t JTAG_ReadIDCode (void) {
|
|
||||||
uint32_t bit;
|
|
||||||
uint32_t val;
|
|
||||||
uint32_t n;
|
|
||||||
|
|
||||||
PIN_TMS_SET();
|
|
||||||
JTAG_CYCLE_TCK(); /* Select-DR-Scan */
|
|
||||||
PIN_TMS_CLR();
|
|
||||||
JTAG_CYCLE_TCK(); /* Capture-DR */
|
|
||||||
JTAG_CYCLE_TCK(); /* Shift-DR */
|
|
||||||
|
|
||||||
for (n = DAP_Data.jtag_dev.index; n; n--) {
|
|
||||||
JTAG_CYCLE_TCK(); /* Bypass before data */
|
|
||||||
}
|
|
||||||
|
|
||||||
val = 0U;
|
|
||||||
for (n = 31U; n; n--) {
|
|
||||||
JTAG_CYCLE_TDO(bit); /* Get D0..D30 */
|
|
||||||
val |= bit << 31;
|
|
||||||
val >>= 1;
|
|
||||||
}
|
|
||||||
PIN_TMS_SET();
|
|
||||||
JTAG_CYCLE_TDO(bit); /* Get D31 & Exit1-DR */
|
|
||||||
val |= bit << 31;
|
|
||||||
|
|
||||||
JTAG_CYCLE_TCK(); /* Update-DR */
|
|
||||||
PIN_TMS_CLR();
|
|
||||||
JTAG_CYCLE_TCK(); /* Idle */
|
|
||||||
|
|
||||||
return (val);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// JTAG Write ABORT register
|
|
||||||
// data: value to write
|
|
||||||
// return: none
|
|
||||||
void JTAG_WriteAbort (uint32_t data) {
|
|
||||||
uint32_t n;
|
|
||||||
|
|
||||||
PIN_TMS_SET();
|
|
||||||
JTAG_CYCLE_TCK(); /* Select-DR-Scan */
|
|
||||||
PIN_TMS_CLR();
|
|
||||||
JTAG_CYCLE_TCK(); /* Capture-DR */
|
|
||||||
JTAG_CYCLE_TCK(); /* Shift-DR */
|
|
||||||
|
|
||||||
for (n = DAP_Data.jtag_dev.index; n; n--) {
|
|
||||||
JTAG_CYCLE_TCK(); /* Bypass before data */
|
|
||||||
}
|
|
||||||
|
|
||||||
PIN_TDI_OUT(0U);
|
|
||||||
JTAG_CYCLE_TCK(); /* Set RnW=0 (Write) */
|
|
||||||
JTAG_CYCLE_TCK(); /* Set A2=0 */
|
|
||||||
JTAG_CYCLE_TCK(); /* Set A3=0 */
|
|
||||||
|
|
||||||
for (n = 31U; n; n--) {
|
|
||||||
JTAG_CYCLE_TDI(data); /* Set D0..D30 */
|
|
||||||
data >>= 1;
|
|
||||||
}
|
|
||||||
n = DAP_Data.jtag_dev.count - DAP_Data.jtag_dev.index - 1U;
|
|
||||||
if (n) {
|
|
||||||
JTAG_CYCLE_TDI(data); /* Set D31 */
|
|
||||||
for (--n; n; n--) {
|
|
||||||
JTAG_CYCLE_TCK(); /* Bypass after data */
|
|
||||||
}
|
|
||||||
PIN_TMS_SET();
|
|
||||||
JTAG_CYCLE_TCK(); /* Bypass & Exit1-DR */
|
|
||||||
} else {
|
|
||||||
PIN_TMS_SET();
|
|
||||||
JTAG_CYCLE_TDI(data); /* Set D31 & Exit1-DR */
|
|
||||||
}
|
|
||||||
|
|
||||||
JTAG_CYCLE_TCK(); /* Update-DR */
|
|
||||||
PIN_TMS_CLR();
|
|
||||||
JTAG_CYCLE_TCK(); /* Idle */
|
|
||||||
PIN_TDI_OUT(1U);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// JTAG Set IR
|
|
||||||
// ir: IR value
|
|
||||||
// return: none
|
|
||||||
void JTAG_IR (uint32_t ir) {
|
|
||||||
if (DAP_Data.fast_clock) {
|
|
||||||
JTAG_IR_Fast(ir);
|
|
||||||
} else {
|
|
||||||
JTAG_IR_Slow(ir);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// JTAG Transfer I/O
|
|
||||||
// request: A[3:2] RnW APnDP
|
|
||||||
// data: DATA[31:0]
|
|
||||||
// return: ACK[2:0]
|
|
||||||
uint8_t JTAG_Transfer(uint32_t request, uint32_t *data) {
|
|
||||||
if (DAP_Data.fast_clock) {
|
|
||||||
return JTAG_TransferFast(request, data);
|
|
||||||
} else {
|
|
||||||
return JTAG_TransferSlow(request, data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* (DAP_JTAG != 0) */
|
|
||||||
@ -1,798 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2013-2021 ARM Limited. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*
|
|
||||||
* ----------------------------------------------------------------------
|
|
||||||
*
|
|
||||||
* $Date: 29. March 2021
|
|
||||||
* $Revision: V2.0.1
|
|
||||||
*
|
|
||||||
* Project: CMSIS-DAP Source
|
|
||||||
* Title: SWO.c CMSIS-DAP SWO I/O
|
|
||||||
*
|
|
||||||
*---------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
#include "DAP_config.h"
|
|
||||||
#include "DAP.h"
|
|
||||||
#if (SWO_UART != 0)
|
|
||||||
#include "Driver_USART.h"
|
|
||||||
#endif
|
|
||||||
#if (SWO_STREAM != 0)
|
|
||||||
#include "cmsis_os2.h"
|
|
||||||
#define osObjectsExternal
|
|
||||||
#include "osObjects.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if (SWO_STREAM != 0)
|
|
||||||
#ifdef DAP_FW_V1
|
|
||||||
#error "SWO Streaming Trace not supported in DAP V1!"
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if (SWO_UART != 0)
|
|
||||||
|
|
||||||
// USART Driver
|
|
||||||
#define _USART_Driver_(n) Driver_USART##n
|
|
||||||
#define USART_Driver_(n) _USART_Driver_(n)
|
|
||||||
extern ARM_DRIVER_USART USART_Driver_(SWO_UART_DRIVER);
|
|
||||||
#define pUSART (&USART_Driver_(SWO_UART_DRIVER))
|
|
||||||
|
|
||||||
static uint8_t USART_Ready = 0U;
|
|
||||||
|
|
||||||
#endif /* (SWO_UART != 0) */
|
|
||||||
|
|
||||||
|
|
||||||
#if ((SWO_UART != 0) || (SWO_MANCHESTER != 0))
|
|
||||||
|
|
||||||
|
|
||||||
#define SWO_STREAM_TIMEOUT 50U /* Stream timeout in ms */
|
|
||||||
|
|
||||||
#define USB_BLOCK_SIZE 512U /* USB Block Size */
|
|
||||||
#define TRACE_BLOCK_SIZE 64U /* Trace Block Size (2^n: 32...512) */
|
|
||||||
|
|
||||||
// Trace State
|
|
||||||
static uint8_t TraceTransport = 0U; /* Trace Transport */
|
|
||||||
static uint8_t TraceMode = 0U; /* Trace Mode */
|
|
||||||
static uint8_t TraceStatus = 0U; /* Trace Status without Errors */
|
|
||||||
static uint8_t TraceError[2] = {0U, 0U}; /* Trace Error flags (banked) */
|
|
||||||
static uint8_t TraceError_n = 0U; /* Active Trace Error bank */
|
|
||||||
|
|
||||||
// Trace Buffer
|
|
||||||
static uint8_t TraceBuf[SWO_BUFFER_SIZE]; /* Trace Buffer (must be 2^n) */
|
|
||||||
static volatile uint32_t TraceIndexI = 0U; /* Incoming Trace Index */
|
|
||||||
static volatile uint32_t TraceIndexO = 0U; /* Outgoing Trace Index */
|
|
||||||
static volatile uint8_t TraceUpdate; /* Trace Update Flag */
|
|
||||||
static uint32_t TraceBlockSize; /* Current Trace Block Size */
|
|
||||||
|
|
||||||
#if (TIMESTAMP_CLOCK != 0U)
|
|
||||||
// Trace Timestamp
|
|
||||||
static volatile struct {
|
|
||||||
uint32_t index;
|
|
||||||
uint32_t tick;
|
|
||||||
} TraceTimestamp;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// Trace Helper functions
|
|
||||||
static void ClearTrace (void);
|
|
||||||
static void ResumeTrace (void);
|
|
||||||
static uint32_t GetTraceCount (void);
|
|
||||||
static uint8_t GetTraceStatus (void);
|
|
||||||
static void SetTraceError (uint8_t flag);
|
|
||||||
|
|
||||||
#if (SWO_STREAM != 0)
|
|
||||||
extern osThreadId_t SWO_ThreadId;
|
|
||||||
static volatile uint8_t TransferBusy = 0U; /* Transfer Busy Flag */
|
|
||||||
static uint32_t TransferSize; /* Current Transfer Size */
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#if (SWO_UART != 0)
|
|
||||||
|
|
||||||
// USART Driver Callback function
|
|
||||||
// event: event mask
|
|
||||||
static void USART_Callback (uint32_t event) {
|
|
||||||
uint32_t index_i;
|
|
||||||
uint32_t index_o;
|
|
||||||
uint32_t count;
|
|
||||||
uint32_t num;
|
|
||||||
|
|
||||||
if (event & ARM_USART_EVENT_RECEIVE_COMPLETE) {
|
|
||||||
#if (TIMESTAMP_CLOCK != 0U)
|
|
||||||
TraceTimestamp.tick = TIMESTAMP_GET();
|
|
||||||
#endif
|
|
||||||
index_o = TraceIndexO;
|
|
||||||
index_i = TraceIndexI;
|
|
||||||
index_i += TraceBlockSize;
|
|
||||||
TraceIndexI = index_i;
|
|
||||||
#if (TIMESTAMP_CLOCK != 0U)
|
|
||||||
TraceTimestamp.index = index_i;
|
|
||||||
#endif
|
|
||||||
num = TRACE_BLOCK_SIZE - (index_i & (TRACE_BLOCK_SIZE - 1U));
|
|
||||||
count = index_i - index_o;
|
|
||||||
if (count <= (SWO_BUFFER_SIZE - num)) {
|
|
||||||
index_i &= SWO_BUFFER_SIZE - 1U;
|
|
||||||
TraceBlockSize = num;
|
|
||||||
pUSART->Receive(&TraceBuf[index_i], num);
|
|
||||||
} else {
|
|
||||||
TraceStatus = DAP_SWO_CAPTURE_ACTIVE | DAP_SWO_CAPTURE_PAUSED;
|
|
||||||
}
|
|
||||||
TraceUpdate = 1U;
|
|
||||||
#if (SWO_STREAM != 0)
|
|
||||||
if (TraceTransport == 2U) {
|
|
||||||
if (count >= (USB_BLOCK_SIZE - (index_o & (USB_BLOCK_SIZE - 1U)))) {
|
|
||||||
osThreadFlagsSet(SWO_ThreadId, 1U);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
if (event & ARM_USART_EVENT_RX_OVERFLOW) {
|
|
||||||
SetTraceError(DAP_SWO_BUFFER_OVERRUN);
|
|
||||||
}
|
|
||||||
if (event & (ARM_USART_EVENT_RX_BREAK |
|
|
||||||
ARM_USART_EVENT_RX_FRAMING_ERROR |
|
|
||||||
ARM_USART_EVENT_RX_PARITY_ERROR)) {
|
|
||||||
SetTraceError(DAP_SWO_STREAM_ERROR);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Enable or disable SWO Mode (UART)
|
|
||||||
// enable: enable flag
|
|
||||||
// return: 1 - Success, 0 - Error
|
|
||||||
__WEAK uint32_t SWO_Mode_UART (uint32_t enable) {
|
|
||||||
int32_t status;
|
|
||||||
|
|
||||||
USART_Ready = 0U;
|
|
||||||
|
|
||||||
if (enable != 0U) {
|
|
||||||
status = pUSART->Initialize(USART_Callback);
|
|
||||||
if (status != ARM_DRIVER_OK) {
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
status = pUSART->PowerControl(ARM_POWER_FULL);
|
|
||||||
if (status != ARM_DRIVER_OK) {
|
|
||||||
pUSART->Uninitialize();
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
pUSART->Control(ARM_USART_CONTROL_RX, 0U);
|
|
||||||
pUSART->Control(ARM_USART_ABORT_RECEIVE, 0U);
|
|
||||||
pUSART->PowerControl(ARM_POWER_OFF);
|
|
||||||
pUSART->Uninitialize();
|
|
||||||
}
|
|
||||||
return (1U);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Configure SWO Baudrate (UART)
|
|
||||||
// baudrate: requested baudrate
|
|
||||||
// return: actual baudrate or 0 when not configured
|
|
||||||
__WEAK uint32_t SWO_Baudrate_UART (uint32_t baudrate) {
|
|
||||||
int32_t status;
|
|
||||||
uint32_t index;
|
|
||||||
uint32_t num;
|
|
||||||
|
|
||||||
if (baudrate > SWO_UART_MAX_BAUDRATE) {
|
|
||||||
baudrate = SWO_UART_MAX_BAUDRATE;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (TraceStatus & DAP_SWO_CAPTURE_ACTIVE) {
|
|
||||||
pUSART->Control(ARM_USART_CONTROL_RX, 0U);
|
|
||||||
if (pUSART->GetStatus().rx_busy) {
|
|
||||||
TraceIndexI += pUSART->GetRxCount();
|
|
||||||
pUSART->Control(ARM_USART_ABORT_RECEIVE, 0U);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
status = pUSART->Control(ARM_USART_MODE_ASYNCHRONOUS |
|
|
||||||
ARM_USART_DATA_BITS_8 |
|
|
||||||
ARM_USART_PARITY_NONE |
|
|
||||||
ARM_USART_STOP_BITS_1,
|
|
||||||
baudrate);
|
|
||||||
|
|
||||||
if (status == ARM_DRIVER_OK) {
|
|
||||||
USART_Ready = 1U;
|
|
||||||
} else {
|
|
||||||
USART_Ready = 0U;
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (TraceStatus & DAP_SWO_CAPTURE_ACTIVE) {
|
|
||||||
if ((TraceStatus & DAP_SWO_CAPTURE_PAUSED) == 0U) {
|
|
||||||
index = TraceIndexI & (SWO_BUFFER_SIZE - 1U);
|
|
||||||
num = TRACE_BLOCK_SIZE - (index & (TRACE_BLOCK_SIZE - 1U));
|
|
||||||
TraceBlockSize = num;
|
|
||||||
pUSART->Receive(&TraceBuf[index], num);
|
|
||||||
}
|
|
||||||
pUSART->Control(ARM_USART_CONTROL_RX, 1U);
|
|
||||||
}
|
|
||||||
|
|
||||||
return (baudrate);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Control SWO Capture (UART)
|
|
||||||
// active: active flag
|
|
||||||
// return: 1 - Success, 0 - Error
|
|
||||||
__WEAK uint32_t SWO_Control_UART (uint32_t active) {
|
|
||||||
int32_t status;
|
|
||||||
|
|
||||||
if (active) {
|
|
||||||
if (!USART_Ready) {
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
TraceBlockSize = 1U;
|
|
||||||
status = pUSART->Receive(&TraceBuf[0], 1U);
|
|
||||||
if (status != ARM_DRIVER_OK) {
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
status = pUSART->Control(ARM_USART_CONTROL_RX, 1U);
|
|
||||||
if (status != ARM_DRIVER_OK) {
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
pUSART->Control(ARM_USART_CONTROL_RX, 0U);
|
|
||||||
if (pUSART->GetStatus().rx_busy) {
|
|
||||||
TraceIndexI += pUSART->GetRxCount();
|
|
||||||
pUSART->Control(ARM_USART_ABORT_RECEIVE, 0U);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return (1U);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start SWO Capture (UART)
|
|
||||||
// buf: pointer to buffer for capturing
|
|
||||||
// num: number of bytes to capture
|
|
||||||
__WEAK void SWO_Capture_UART (uint8_t *buf, uint32_t num) {
|
|
||||||
TraceBlockSize = num;
|
|
||||||
pUSART->Receive(buf, num);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get SWO Pending Trace Count (UART)
|
|
||||||
// return: number of pending trace data bytes
|
|
||||||
__WEAK uint32_t SWO_GetCount_UART (void) {
|
|
||||||
uint32_t count;
|
|
||||||
|
|
||||||
if (pUSART->GetStatus().rx_busy) {
|
|
||||||
count = pUSART->GetRxCount();
|
|
||||||
} else {
|
|
||||||
count = 0U;
|
|
||||||
}
|
|
||||||
return (count);
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* (SWO_UART != 0) */
|
|
||||||
|
|
||||||
|
|
||||||
#if (SWO_MANCHESTER != 0)
|
|
||||||
|
|
||||||
// Enable or disable SWO Mode (Manchester)
|
|
||||||
// enable: enable flag
|
|
||||||
// return: 1 - Success, 0 - Error
|
|
||||||
__WEAK uint32_t SWO_Mode_Manchester (uint32_t enable) {
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Configure SWO Baudrate (Manchester)
|
|
||||||
// baudrate: requested baudrate
|
|
||||||
// return: actual baudrate or 0 when not configured
|
|
||||||
__WEAK uint32_t SWO_Baudrate_Manchester (uint32_t baudrate) {
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Control SWO Capture (Manchester)
|
|
||||||
// active: active flag
|
|
||||||
// return: 1 - Success, 0 - Error
|
|
||||||
__WEAK uint32_t SWO_Control_Manchester (uint32_t active) {
|
|
||||||
return (0U);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Start SWO Capture (Manchester)
|
|
||||||
// buf: pointer to buffer for capturing
|
|
||||||
// num: number of bytes to capture
|
|
||||||
__WEAK void SWO_Capture_Manchester (uint8_t *buf, uint32_t num) {
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get SWO Pending Trace Count (Manchester)
|
|
||||||
// return: number of pending trace data bytes
|
|
||||||
__WEAK uint32_t SWO_GetCount_Manchester (void) {
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* (SWO_MANCHESTER != 0) */
|
|
||||||
|
|
||||||
|
|
||||||
// Clear Trace Errors and Data
|
|
||||||
static void ClearTrace (void) {
|
|
||||||
|
|
||||||
#if (SWO_STREAM != 0)
|
|
||||||
if (TraceTransport == 2U) {
|
|
||||||
if (TransferBusy != 0U) {
|
|
||||||
SWO_AbortTransfer();
|
|
||||||
TransferBusy = 0U;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
TraceError[0] = 0U;
|
|
||||||
TraceError[1] = 0U;
|
|
||||||
TraceError_n = 0U;
|
|
||||||
TraceIndexI = 0U;
|
|
||||||
TraceIndexO = 0U;
|
|
||||||
|
|
||||||
#if (TIMESTAMP_CLOCK != 0U)
|
|
||||||
TraceTimestamp.index = 0U;
|
|
||||||
TraceTimestamp.tick = 0U;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
// Resume Trace Capture
|
|
||||||
static void ResumeTrace (void) {
|
|
||||||
uint32_t index_i;
|
|
||||||
uint32_t index_o;
|
|
||||||
|
|
||||||
if (TraceStatus == (DAP_SWO_CAPTURE_ACTIVE | DAP_SWO_CAPTURE_PAUSED)) {
|
|
||||||
index_i = TraceIndexI;
|
|
||||||
index_o = TraceIndexO;
|
|
||||||
if ((index_i - index_o) < SWO_BUFFER_SIZE) {
|
|
||||||
index_i &= SWO_BUFFER_SIZE - 1U;
|
|
||||||
switch (TraceMode) {
|
|
||||||
#if (SWO_UART != 0)
|
|
||||||
case DAP_SWO_UART:
|
|
||||||
TraceStatus = DAP_SWO_CAPTURE_ACTIVE;
|
|
||||||
SWO_Capture_UART(&TraceBuf[index_i], 1U);
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
#if (SWO_MANCHESTER != 0)
|
|
||||||
case DAP_SWO_MANCHESTER:
|
|
||||||
TraceStatus = DAP_SWO_CAPTURE_ACTIVE;
|
|
||||||
SWO_Capture_Manchester(&TraceBuf[index_i], 1U);
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get Trace Count
|
|
||||||
// return: number of available data bytes in trace buffer
|
|
||||||
static uint32_t GetTraceCount (void) {
|
|
||||||
uint32_t count;
|
|
||||||
|
|
||||||
if (TraceStatus == DAP_SWO_CAPTURE_ACTIVE) {
|
|
||||||
do {
|
|
||||||
TraceUpdate = 0U;
|
|
||||||
count = TraceIndexI - TraceIndexO;
|
|
||||||
switch (TraceMode) {
|
|
||||||
#if (SWO_UART != 0)
|
|
||||||
case DAP_SWO_UART:
|
|
||||||
count += SWO_GetCount_UART();
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
#if (SWO_MANCHESTER != 0)
|
|
||||||
case DAP_SWO_MANCHESTER:
|
|
||||||
count += SWO_GetCount_Manchester();
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} while (TraceUpdate != 0U);
|
|
||||||
} else {
|
|
||||||
count = TraceIndexI - TraceIndexO;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (count);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get Trace Status (clear Error flags)
|
|
||||||
// return: Trace Status (Active flag and Error flags)
|
|
||||||
static uint8_t GetTraceStatus (void) {
|
|
||||||
uint8_t status;
|
|
||||||
uint32_t n;
|
|
||||||
|
|
||||||
n = TraceError_n;
|
|
||||||
TraceError_n ^= 1U;
|
|
||||||
status = TraceStatus | TraceError[n];
|
|
||||||
TraceError[n] = 0U;
|
|
||||||
|
|
||||||
return (status);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set Trace Error flag(s)
|
|
||||||
// flag: error flag(s) to set
|
|
||||||
static void SetTraceError (uint8_t flag) {
|
|
||||||
TraceError[TraceError_n] |= flag;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Process SWO Transport command and prepare response
|
|
||||||
// request: pointer to request data
|
|
||||||
// response: pointer to response data
|
|
||||||
// return: number of bytes in response (lower 16 bits)
|
|
||||||
// number of bytes in request (upper 16 bits)
|
|
||||||
uint32_t SWO_Transport (const uint8_t *request, uint8_t *response) {
|
|
||||||
uint8_t transport;
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
if ((TraceStatus & DAP_SWO_CAPTURE_ACTIVE) == 0U) {
|
|
||||||
transport = *request;
|
|
||||||
switch (transport) {
|
|
||||||
case 0U:
|
|
||||||
case 1U:
|
|
||||||
#if (SWO_STREAM != 0)
|
|
||||||
case 2U:
|
|
||||||
#endif
|
|
||||||
TraceTransport = transport;
|
|
||||||
result = 1U;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
result = 0U;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
result = 0U;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (result != 0U) {
|
|
||||||
*response = DAP_OK;
|
|
||||||
} else {
|
|
||||||
*response = DAP_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ((1U << 16) | 1U);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Process SWO Mode command and prepare response
|
|
||||||
// request: pointer to request data
|
|
||||||
// response: pointer to response data
|
|
||||||
// return: number of bytes in response (lower 16 bits)
|
|
||||||
// number of bytes in request (upper 16 bits)
|
|
||||||
uint32_t SWO_Mode (const uint8_t *request, uint8_t *response) {
|
|
||||||
uint8_t mode;
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
mode = *request;
|
|
||||||
|
|
||||||
switch (TraceMode) {
|
|
||||||
#if (SWO_UART != 0)
|
|
||||||
case DAP_SWO_UART:
|
|
||||||
SWO_Mode_UART(0U);
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
#if (SWO_MANCHESTER != 0)
|
|
||||||
case DAP_SWO_MANCHESTER:
|
|
||||||
SWO_Mode_Manchester(0U);
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (mode) {
|
|
||||||
case DAP_SWO_OFF:
|
|
||||||
result = 1U;
|
|
||||||
break;
|
|
||||||
#if (SWO_UART != 0)
|
|
||||||
case DAP_SWO_UART:
|
|
||||||
result = SWO_Mode_UART(1U);
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
#if (SWO_MANCHESTER != 0)
|
|
||||||
case DAP_SWO_MANCHESTER:
|
|
||||||
result = SWO_Mode_Manchester(1U);
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
default:
|
|
||||||
result = 0U;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (result != 0U) {
|
|
||||||
TraceMode = mode;
|
|
||||||
} else {
|
|
||||||
TraceMode = DAP_SWO_OFF;
|
|
||||||
}
|
|
||||||
|
|
||||||
TraceStatus = 0U;
|
|
||||||
|
|
||||||
if (result != 0U) {
|
|
||||||
*response = DAP_OK;
|
|
||||||
} else {
|
|
||||||
*response = DAP_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ((1U << 16) | 1U);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Process SWO Baudrate command and prepare response
|
|
||||||
// request: pointer to request data
|
|
||||||
// response: pointer to response data
|
|
||||||
// return: number of bytes in response (lower 16 bits)
|
|
||||||
// number of bytes in request (upper 16 bits)
|
|
||||||
uint32_t SWO_Baudrate (const uint8_t *request, uint8_t *response) {
|
|
||||||
uint32_t baudrate;
|
|
||||||
|
|
||||||
baudrate = (uint32_t)(*(request+0) << 0) |
|
|
||||||
(uint32_t)(*(request+1) << 8) |
|
|
||||||
(uint32_t)(*(request+2) << 16) |
|
|
||||||
(uint32_t)(*(request+3) << 24);
|
|
||||||
|
|
||||||
switch (TraceMode) {
|
|
||||||
#if (SWO_UART != 0)
|
|
||||||
case DAP_SWO_UART:
|
|
||||||
baudrate = SWO_Baudrate_UART(baudrate);
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
#if (SWO_MANCHESTER != 0)
|
|
||||||
case DAP_SWO_MANCHESTER:
|
|
||||||
baudrate = SWO_Baudrate_Manchester(baudrate);
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
default:
|
|
||||||
baudrate = 0U;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (baudrate == 0U) {
|
|
||||||
TraceStatus = 0U;
|
|
||||||
}
|
|
||||||
|
|
||||||
*response++ = (uint8_t)(baudrate >> 0);
|
|
||||||
*response++ = (uint8_t)(baudrate >> 8);
|
|
||||||
*response++ = (uint8_t)(baudrate >> 16);
|
|
||||||
*response = (uint8_t)(baudrate >> 24);
|
|
||||||
|
|
||||||
return ((4U << 16) | 4U);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Process SWO Control command and prepare response
|
|
||||||
// request: pointer to request data
|
|
||||||
// response: pointer to response data
|
|
||||||
// return: number of bytes in response (lower 16 bits)
|
|
||||||
// number of bytes in request (upper 16 bits)
|
|
||||||
uint32_t SWO_Control (const uint8_t *request, uint8_t *response) {
|
|
||||||
uint8_t active;
|
|
||||||
uint32_t result;
|
|
||||||
|
|
||||||
active = *request & DAP_SWO_CAPTURE_ACTIVE;
|
|
||||||
|
|
||||||
if (active != (TraceStatus & DAP_SWO_CAPTURE_ACTIVE)) {
|
|
||||||
if (active) {
|
|
||||||
ClearTrace();
|
|
||||||
}
|
|
||||||
switch (TraceMode) {
|
|
||||||
#if (SWO_UART != 0)
|
|
||||||
case DAP_SWO_UART:
|
|
||||||
result = SWO_Control_UART(active);
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
#if (SWO_MANCHESTER != 0)
|
|
||||||
case DAP_SWO_MANCHESTER:
|
|
||||||
result = SWO_Control_Manchester(active);
|
|
||||||
break;
|
|
||||||
#endif
|
|
||||||
default:
|
|
||||||
result = 0U;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (result != 0U) {
|
|
||||||
TraceStatus = active;
|
|
||||||
#if (SWO_STREAM != 0)
|
|
||||||
if (TraceTransport == 2U) {
|
|
||||||
osThreadFlagsSet(SWO_ThreadId, 1U);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
result = 1U;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (result != 0U) {
|
|
||||||
*response = DAP_OK;
|
|
||||||
} else {
|
|
||||||
*response = DAP_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ((1U << 16) | 1U);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Process SWO Status command and prepare response
|
|
||||||
// response: pointer to response data
|
|
||||||
// return: number of bytes in response
|
|
||||||
uint32_t SWO_Status (uint8_t *response) {
|
|
||||||
uint8_t status;
|
|
||||||
uint32_t count;
|
|
||||||
|
|
||||||
status = GetTraceStatus();
|
|
||||||
count = GetTraceCount();
|
|
||||||
|
|
||||||
*response++ = status;
|
|
||||||
*response++ = (uint8_t)(count >> 0);
|
|
||||||
*response++ = (uint8_t)(count >> 8);
|
|
||||||
*response++ = (uint8_t)(count >> 16);
|
|
||||||
*response = (uint8_t)(count >> 24);
|
|
||||||
|
|
||||||
return (5U);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Process SWO Extended Status command and prepare response
|
|
||||||
// request: pointer to request data
|
|
||||||
// response: pointer to response data
|
|
||||||
// return: number of bytes in response (lower 16 bits)
|
|
||||||
// number of bytes in request (upper 16 bits)
|
|
||||||
uint32_t SWO_ExtendedStatus (const uint8_t *request, uint8_t *response) {
|
|
||||||
uint8_t cmd;
|
|
||||||
uint8_t status;
|
|
||||||
uint32_t count;
|
|
||||||
#if (TIMESTAMP_CLOCK != 0U)
|
|
||||||
uint32_t index;
|
|
||||||
uint32_t tick;
|
|
||||||
#endif
|
|
||||||
uint32_t num;
|
|
||||||
|
|
||||||
num = 0U;
|
|
||||||
cmd = *request;
|
|
||||||
|
|
||||||
if (cmd & 0x01U) {
|
|
||||||
status = GetTraceStatus();
|
|
||||||
*response++ = status;
|
|
||||||
num += 1U;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cmd & 0x02U) {
|
|
||||||
count = GetTraceCount();
|
|
||||||
*response++ = (uint8_t)(count >> 0);
|
|
||||||
*response++ = (uint8_t)(count >> 8);
|
|
||||||
*response++ = (uint8_t)(count >> 16);
|
|
||||||
*response++ = (uint8_t)(count >> 24);
|
|
||||||
num += 4U;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if (TIMESTAMP_CLOCK != 0U)
|
|
||||||
if (cmd & 0x04U) {
|
|
||||||
do {
|
|
||||||
TraceUpdate = 0U;
|
|
||||||
index = TraceTimestamp.index;
|
|
||||||
tick = TraceTimestamp.tick;
|
|
||||||
} while (TraceUpdate != 0U);
|
|
||||||
*response++ = (uint8_t)(index >> 0);
|
|
||||||
*response++ = (uint8_t)(index >> 8);
|
|
||||||
*response++ = (uint8_t)(index >> 16);
|
|
||||||
*response++ = (uint8_t)(index >> 24);
|
|
||||||
*response++ = (uint8_t)(tick >> 0);
|
|
||||||
*response++ = (uint8_t)(tick >> 8);
|
|
||||||
*response++ = (uint8_t)(tick >> 16);
|
|
||||||
*response++ = (uint8_t)(tick >> 24);
|
|
||||||
num += 4U;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return ((1U << 16) | num);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Process SWO Data command and prepare response
|
|
||||||
// request: pointer to request data
|
|
||||||
// response: pointer to response data
|
|
||||||
// return: number of bytes in response (lower 16 bits)
|
|
||||||
// number of bytes in request (upper 16 bits)
|
|
||||||
uint32_t SWO_Data (const uint8_t *request, uint8_t *response) {
|
|
||||||
uint8_t status;
|
|
||||||
uint32_t count;
|
|
||||||
uint32_t index;
|
|
||||||
uint32_t n, i;
|
|
||||||
|
|
||||||
status = GetTraceStatus();
|
|
||||||
count = GetTraceCount();
|
|
||||||
|
|
||||||
if (TraceTransport == 1U) {
|
|
||||||
n = (uint32_t)(*(request+0) << 0) |
|
|
||||||
(uint32_t)(*(request+1) << 8);
|
|
||||||
if (n > (DAP_PACKET_SIZE - 4U)) {
|
|
||||||
n = DAP_PACKET_SIZE - 4U;
|
|
||||||
}
|
|
||||||
if (count > n) {
|
|
||||||
count = n;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
count = 0U;
|
|
||||||
}
|
|
||||||
|
|
||||||
*response++ = status;
|
|
||||||
*response++ = (uint8_t)(count >> 0);
|
|
||||||
*response++ = (uint8_t)(count >> 8);
|
|
||||||
|
|
||||||
if (TraceTransport == 1U) {
|
|
||||||
index = TraceIndexO;
|
|
||||||
for (i = index, n = count; n; n--) {
|
|
||||||
i &= SWO_BUFFER_SIZE - 1U;
|
|
||||||
*response++ = TraceBuf[i++];
|
|
||||||
}
|
|
||||||
TraceIndexO = index + count;
|
|
||||||
ResumeTrace();
|
|
||||||
}
|
|
||||||
|
|
||||||
return ((2U << 16) | (3U + count));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#if (SWO_STREAM != 0)
|
|
||||||
|
|
||||||
// SWO Data Transfer complete callback
|
|
||||||
void SWO_TransferComplete (void) {
|
|
||||||
TraceIndexO += TransferSize;
|
|
||||||
TransferBusy = 0U;
|
|
||||||
ResumeTrace();
|
|
||||||
osThreadFlagsSet(SWO_ThreadId, 1U);
|
|
||||||
}
|
|
||||||
|
|
||||||
// SWO Thread
|
|
||||||
__NO_RETURN void SWO_Thread (void *argument) {
|
|
||||||
uint32_t timeout;
|
|
||||||
uint32_t flags;
|
|
||||||
uint32_t count;
|
|
||||||
uint32_t index;
|
|
||||||
uint32_t i, n;
|
|
||||||
(void) argument;
|
|
||||||
|
|
||||||
timeout = osWaitForever;
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
flags = osThreadFlagsWait(1U, osFlagsWaitAny, timeout);
|
|
||||||
if (TraceStatus & DAP_SWO_CAPTURE_ACTIVE) {
|
|
||||||
timeout = SWO_STREAM_TIMEOUT;
|
|
||||||
} else {
|
|
||||||
timeout = osWaitForever;
|
|
||||||
flags = osFlagsErrorTimeout;
|
|
||||||
}
|
|
||||||
if (TransferBusy == 0U) {
|
|
||||||
count = GetTraceCount();
|
|
||||||
if (count != 0U) {
|
|
||||||
index = TraceIndexO & (SWO_BUFFER_SIZE - 1U);
|
|
||||||
n = SWO_BUFFER_SIZE - index;
|
|
||||||
if (count > n) {
|
|
||||||
count = n;
|
|
||||||
}
|
|
||||||
if (flags != osFlagsErrorTimeout) {
|
|
||||||
i = index & (USB_BLOCK_SIZE - 1U);
|
|
||||||
if (i == 0U) {
|
|
||||||
count &= ~(USB_BLOCK_SIZE - 1U);
|
|
||||||
} else {
|
|
||||||
n = USB_BLOCK_SIZE - i;
|
|
||||||
if (count >= n) {
|
|
||||||
count = n;
|
|
||||||
} else {
|
|
||||||
count = 0U;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (count != 0U) {
|
|
||||||
TransferSize = count;
|
|
||||||
TransferBusy = 1U;
|
|
||||||
SWO_QueueTransfer(&TraceBuf[index], count);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* (SWO_STREAM != 0) */
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* ((SWO_UART != 0) || (SWO_MANCHESTER != 0)) */
|
|
||||||
@ -1,286 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2013-2017 ARM Limited. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*
|
|
||||||
* ----------------------------------------------------------------------
|
|
||||||
*
|
|
||||||
* $Date: 1. December 2017
|
|
||||||
* $Revision: V2.0.0
|
|
||||||
*
|
|
||||||
* Project: CMSIS-DAP Source
|
|
||||||
* Title: SW_DP.c CMSIS-DAP SW DP I/O
|
|
||||||
*
|
|
||||||
*---------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
#include "DAP_config.h"
|
|
||||||
#include "DAP.h"
|
|
||||||
|
|
||||||
|
|
||||||
// SW Macros
|
|
||||||
|
|
||||||
#define PIN_SWCLK_SET PIN_SWCLK_TCK_SET
|
|
||||||
#define PIN_SWCLK_CLR PIN_SWCLK_TCK_CLR
|
|
||||||
|
|
||||||
#define SW_CLOCK_CYCLE() \
|
|
||||||
PIN_SWCLK_CLR(); \
|
|
||||||
PIN_DELAY(); \
|
|
||||||
PIN_SWCLK_SET(); \
|
|
||||||
PIN_DELAY()
|
|
||||||
|
|
||||||
#define SW_WRITE_BIT(bit) \
|
|
||||||
PIN_SWDIO_OUT(bit); \
|
|
||||||
PIN_SWCLK_CLR(); \
|
|
||||||
PIN_DELAY(); \
|
|
||||||
PIN_SWCLK_SET(); \
|
|
||||||
PIN_DELAY()
|
|
||||||
|
|
||||||
#define SW_READ_BIT(bit) \
|
|
||||||
PIN_SWCLK_CLR(); \
|
|
||||||
PIN_DELAY(); \
|
|
||||||
bit = PIN_SWDIO_IN(); \
|
|
||||||
PIN_SWCLK_SET(); \
|
|
||||||
PIN_DELAY()
|
|
||||||
|
|
||||||
#define PIN_DELAY() PIN_DELAY_SLOW(DAP_Data.clock_delay)
|
|
||||||
|
|
||||||
|
|
||||||
// Generate SWJ Sequence
|
|
||||||
// count: sequence bit count
|
|
||||||
// data: pointer to sequence bit data
|
|
||||||
// return: none
|
|
||||||
#if ((DAP_SWD != 0) || (DAP_JTAG != 0))
|
|
||||||
void SWJ_Sequence (uint32_t count, const uint8_t *data) {
|
|
||||||
uint32_t val;
|
|
||||||
uint32_t n;
|
|
||||||
|
|
||||||
val = 0U;
|
|
||||||
n = 0U;
|
|
||||||
while (count--) {
|
|
||||||
if (n == 0U) {
|
|
||||||
val = *data++;
|
|
||||||
n = 8U;
|
|
||||||
}
|
|
||||||
if (val & 1U) {
|
|
||||||
PIN_SWDIO_TMS_SET();
|
|
||||||
} else {
|
|
||||||
PIN_SWDIO_TMS_CLR();
|
|
||||||
}
|
|
||||||
SW_CLOCK_CYCLE();
|
|
||||||
val >>= 1;
|
|
||||||
n--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
// Generate SWD Sequence
|
|
||||||
// info: sequence information
|
|
||||||
// swdo: pointer to SWDIO generated data
|
|
||||||
// swdi: pointer to SWDIO captured data
|
|
||||||
// return: none
|
|
||||||
#if (DAP_SWD != 0)
|
|
||||||
void SWD_Sequence (uint32_t info, const uint8_t *swdo, uint8_t *swdi) {
|
|
||||||
uint32_t val;
|
|
||||||
uint32_t bit;
|
|
||||||
uint32_t n, k;
|
|
||||||
|
|
||||||
n = info & SWD_SEQUENCE_CLK;
|
|
||||||
if (n == 0U) {
|
|
||||||
n = 64U;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (info & SWD_SEQUENCE_DIN) {
|
|
||||||
while (n) {
|
|
||||||
val = 0U;
|
|
||||||
for (k = 8U; k && n; k--, n--) {
|
|
||||||
SW_READ_BIT(bit);
|
|
||||||
val >>= 1;
|
|
||||||
val |= bit << 7;
|
|
||||||
}
|
|
||||||
val >>= k;
|
|
||||||
*swdi++ = (uint8_t)val;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
while (n) {
|
|
||||||
val = *swdo++;
|
|
||||||
for (k = 8U; k && n; k--, n--) {
|
|
||||||
SW_WRITE_BIT(val);
|
|
||||||
val >>= 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
#if (DAP_SWD != 0)
|
|
||||||
|
|
||||||
|
|
||||||
// SWD Transfer I/O
|
|
||||||
// request: A[3:2] RnW APnDP
|
|
||||||
// data: DATA[31:0]
|
|
||||||
// return: ACK[2:0]
|
|
||||||
#define SWD_TransferFunction(speed) /**/ \
|
|
||||||
static uint8_t SWD_Transfer##speed (uint32_t request, uint32_t *data) { \
|
|
||||||
uint32_t ack; \
|
|
||||||
uint32_t bit; \
|
|
||||||
uint32_t val; \
|
|
||||||
uint32_t parity; \
|
|
||||||
\
|
|
||||||
uint32_t n; \
|
|
||||||
\
|
|
||||||
/* Packet Request */ \
|
|
||||||
parity = 0U; \
|
|
||||||
SW_WRITE_BIT(1U); /* Start Bit */ \
|
|
||||||
bit = request >> 0; \
|
|
||||||
SW_WRITE_BIT(bit); /* APnDP Bit */ \
|
|
||||||
parity += bit; \
|
|
||||||
bit = request >> 1; \
|
|
||||||
SW_WRITE_BIT(bit); /* RnW Bit */ \
|
|
||||||
parity += bit; \
|
|
||||||
bit = request >> 2; \
|
|
||||||
SW_WRITE_BIT(bit); /* A2 Bit */ \
|
|
||||||
parity += bit; \
|
|
||||||
bit = request >> 3; \
|
|
||||||
SW_WRITE_BIT(bit); /* A3 Bit */ \
|
|
||||||
parity += bit; \
|
|
||||||
SW_WRITE_BIT(parity); /* Parity Bit */ \
|
|
||||||
SW_WRITE_BIT(0U); /* Stop Bit */ \
|
|
||||||
SW_WRITE_BIT(1U); /* Park Bit */ \
|
|
||||||
\
|
|
||||||
/* Turnaround */ \
|
|
||||||
PIN_SWDIO_OUT_DISABLE(); \
|
|
||||||
for (n = DAP_Data.swd_conf.turnaround; n; n--) { \
|
|
||||||
SW_CLOCK_CYCLE(); \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
/* Acknowledge response */ \
|
|
||||||
SW_READ_BIT(bit); \
|
|
||||||
ack = bit << 0; \
|
|
||||||
SW_READ_BIT(bit); \
|
|
||||||
ack |= bit << 1; \
|
|
||||||
SW_READ_BIT(bit); \
|
|
||||||
ack |= bit << 2; \
|
|
||||||
\
|
|
||||||
if (ack == DAP_TRANSFER_OK) { /* OK response */ \
|
|
||||||
/* Data transfer */ \
|
|
||||||
if (request & DAP_TRANSFER_RnW) { \
|
|
||||||
/* Read data */ \
|
|
||||||
val = 0U; \
|
|
||||||
parity = 0U; \
|
|
||||||
for (n = 32U; n; n--) { \
|
|
||||||
SW_READ_BIT(bit); /* Read RDATA[0:31] */ \
|
|
||||||
parity += bit; \
|
|
||||||
val >>= 1; \
|
|
||||||
val |= bit << 31; \
|
|
||||||
} \
|
|
||||||
SW_READ_BIT(bit); /* Read Parity */ \
|
|
||||||
if ((parity ^ bit) & 1U) { \
|
|
||||||
ack = DAP_TRANSFER_ERROR; \
|
|
||||||
} \
|
|
||||||
if (data) { *data = val; } \
|
|
||||||
/* Turnaround */ \
|
|
||||||
for (n = DAP_Data.swd_conf.turnaround; n; n--) { \
|
|
||||||
SW_CLOCK_CYCLE(); \
|
|
||||||
} \
|
|
||||||
PIN_SWDIO_OUT_ENABLE(); \
|
|
||||||
} else { \
|
|
||||||
/* Turnaround */ \
|
|
||||||
for (n = DAP_Data.swd_conf.turnaround; n; n--) { \
|
|
||||||
SW_CLOCK_CYCLE(); \
|
|
||||||
} \
|
|
||||||
PIN_SWDIO_OUT_ENABLE(); \
|
|
||||||
/* Write data */ \
|
|
||||||
val = *data; \
|
|
||||||
parity = 0U; \
|
|
||||||
for (n = 32U; n; n--) { \
|
|
||||||
SW_WRITE_BIT(val); /* Write WDATA[0:31] */ \
|
|
||||||
parity += val; \
|
|
||||||
val >>= 1; \
|
|
||||||
} \
|
|
||||||
SW_WRITE_BIT(parity); /* Write Parity Bit */ \
|
|
||||||
} \
|
|
||||||
/* Capture Timestamp */ \
|
|
||||||
if (request & DAP_TRANSFER_TIMESTAMP) { \
|
|
||||||
DAP_Data.timestamp = TIMESTAMP_GET(); \
|
|
||||||
} \
|
|
||||||
/* Idle cycles */ \
|
|
||||||
n = DAP_Data.transfer.idle_cycles; \
|
|
||||||
if (n) { \
|
|
||||||
PIN_SWDIO_OUT(0U); \
|
|
||||||
for (; n; n--) { \
|
|
||||||
SW_CLOCK_CYCLE(); \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
PIN_SWDIO_OUT(1U); \
|
|
||||||
return ((uint8_t)ack); \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
if ((ack == DAP_TRANSFER_WAIT) || (ack == DAP_TRANSFER_FAULT)) { \
|
|
||||||
/* WAIT or FAULT response */ \
|
|
||||||
if (DAP_Data.swd_conf.data_phase && ((request & DAP_TRANSFER_RnW) != 0U)) { \
|
|
||||||
for (n = 32U+1U; n; n--) { \
|
|
||||||
SW_CLOCK_CYCLE(); /* Dummy Read RDATA[0:31] + Parity */ \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
/* Turnaround */ \
|
|
||||||
for (n = DAP_Data.swd_conf.turnaround; n; n--) { \
|
|
||||||
SW_CLOCK_CYCLE(); \
|
|
||||||
} \
|
|
||||||
PIN_SWDIO_OUT_ENABLE(); \
|
|
||||||
if (DAP_Data.swd_conf.data_phase && ((request & DAP_TRANSFER_RnW) == 0U)) { \
|
|
||||||
PIN_SWDIO_OUT(0U); \
|
|
||||||
for (n = 32U+1U; n; n--) { \
|
|
||||||
SW_CLOCK_CYCLE(); /* Dummy Write WDATA[0:31] + Parity */ \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
PIN_SWDIO_OUT(1U); \
|
|
||||||
return ((uint8_t)ack); \
|
|
||||||
} \
|
|
||||||
\
|
|
||||||
/* Protocol error */ \
|
|
||||||
for (n = DAP_Data.swd_conf.turnaround + 32U + 1U; n; n--) { \
|
|
||||||
SW_CLOCK_CYCLE(); /* Back off data phase */ \
|
|
||||||
} \
|
|
||||||
PIN_SWDIO_OUT_ENABLE(); \
|
|
||||||
PIN_SWDIO_OUT(1U); \
|
|
||||||
return ((uint8_t)ack); \
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#undef PIN_DELAY
|
|
||||||
#define PIN_DELAY() PIN_DELAY_FAST()
|
|
||||||
SWD_TransferFunction(Fast)
|
|
||||||
|
|
||||||
#undef PIN_DELAY
|
|
||||||
#define PIN_DELAY() PIN_DELAY_SLOW(DAP_Data.clock_delay)
|
|
||||||
SWD_TransferFunction(Slow)
|
|
||||||
|
|
||||||
|
|
||||||
// SWD Transfer I/O
|
|
||||||
// request: A[3:2] RnW APnDP
|
|
||||||
// data: DATA[31:0]
|
|
||||||
// return: ACK[2:0]
|
|
||||||
uint8_t SWD_Transfer(uint32_t request, uint32_t *data) {
|
|
||||||
if (DAP_Data.fast_clock) {
|
|
||||||
return SWD_TransferFast(request, data);
|
|
||||||
} else {
|
|
||||||
return SWD_TransferSlow(request, data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
#endif /* (DAP_SWD != 0) */
|
|
||||||
@ -1,652 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2021 ARM Limited. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*
|
|
||||||
* ----------------------------------------------------------------------
|
|
||||||
*
|
|
||||||
* $Date: 1. March 2021
|
|
||||||
* $Revision: V1.0.0
|
|
||||||
*
|
|
||||||
* Project: CMSIS-DAP Source
|
|
||||||
* Title: UART.c CMSIS-DAP UART
|
|
||||||
*
|
|
||||||
*---------------------------------------------------------------------------*/
|
|
||||||
|
|
||||||
#include "DAP_config.h"
|
|
||||||
#include "DAP.h"
|
|
||||||
|
|
||||||
#if (DAP_UART != 0)
|
|
||||||
|
|
||||||
#ifdef DAP_FW_V1
|
|
||||||
#error "UART Communication Port not supported in DAP V1!"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "Driver_USART.h"
|
|
||||||
|
|
||||||
#include "cmsis_os2.h"
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
#define UART_RX_BLOCK_SIZE 32U /* Uart Rx Block Size (must be 2^n) */
|
|
||||||
|
|
||||||
// USART Driver
|
|
||||||
#define _USART_Driver_(n) Driver_USART##n
|
|
||||||
#define USART_Driver_(n) _USART_Driver_(n)
|
|
||||||
extern ARM_DRIVER_USART USART_Driver_(DAP_UART_DRIVER);
|
|
||||||
#define pUSART (&USART_Driver_(DAP_UART_DRIVER))
|
|
||||||
|
|
||||||
// UART Configuration
|
|
||||||
#if (DAP_UART_USB_COM_PORT != 0)
|
|
||||||
static uint8_t UartTransport = DAP_UART_TRANSPORT_USB_COM_PORT;
|
|
||||||
#else
|
|
||||||
static uint8_t UartTransport = DAP_UART_TRANSPORT_NONE;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// UART Flags
|
|
||||||
static uint8_t UartConfigured = 0U;
|
|
||||||
static uint8_t UartReceiveEnabled = 0U;
|
|
||||||
static uint8_t UartTransmitEnabled = 0U;
|
|
||||||
static uint8_t UartTransmitActive = 0U;
|
|
||||||
|
|
||||||
// UART TX Buffer
|
|
||||||
static uint8_t UartTxBuf[DAP_UART_TX_BUFFER_SIZE];
|
|
||||||
static volatile uint32_t UartTxIndexI = 0U;
|
|
||||||
static volatile uint32_t UartTxIndexO = 0U;
|
|
||||||
|
|
||||||
// UART RX Buffer
|
|
||||||
static uint8_t UartRxBuf[DAP_UART_RX_BUFFER_SIZE];
|
|
||||||
static volatile uint32_t UartRxIndexI = 0U;
|
|
||||||
static volatile uint32_t UartRxIndexO = 0U;
|
|
||||||
|
|
||||||
// Uart Errors
|
|
||||||
static volatile uint8_t UartErrorRxDataLost = 0U;
|
|
||||||
static volatile uint8_t UartErrorFraming = 0U;
|
|
||||||
static volatile uint8_t UartErrorParity = 0U;
|
|
||||||
|
|
||||||
// UART Transmit
|
|
||||||
static uint32_t UartTxNum = 0U;
|
|
||||||
|
|
||||||
// Function prototypes
|
|
||||||
static uint8_t UART_Init (void);
|
|
||||||
static void UART_Uninit (void);
|
|
||||||
static uint8_t UART_Get_Status (void);
|
|
||||||
static uint8_t UART_Receive_Enable (void);
|
|
||||||
static uint8_t UART_Transmit_Enable (void);
|
|
||||||
static void UART_Receive_Disable (void);
|
|
||||||
static void UART_Transmit_Disable (void);
|
|
||||||
static void UART_Receive_Flush (void);
|
|
||||||
static void UART_Transmit_Flush (void);
|
|
||||||
static void UART_Receive (void);
|
|
||||||
static void UART_Transmit (void);
|
|
||||||
|
|
||||||
|
|
||||||
// USART Driver Callback function
|
|
||||||
// event: event mask
|
|
||||||
static void USART_Callback (uint32_t event) {
|
|
||||||
if (event & ARM_USART_EVENT_SEND_COMPLETE) {
|
|
||||||
UartTxIndexO += UartTxNum;
|
|
||||||
UartTransmitActive = 0U;
|
|
||||||
UART_Transmit();
|
|
||||||
}
|
|
||||||
if (event & ARM_USART_EVENT_RECEIVE_COMPLETE) {
|
|
||||||
UartRxIndexI += UART_RX_BLOCK_SIZE;
|
|
||||||
UART_Receive();
|
|
||||||
}
|
|
||||||
if (event & ARM_USART_EVENT_RX_OVERFLOW) {
|
|
||||||
UartErrorRxDataLost = 1U;
|
|
||||||
}
|
|
||||||
if (event & ARM_USART_EVENT_RX_FRAMING_ERROR) {
|
|
||||||
UartErrorFraming = 1U;
|
|
||||||
}
|
|
||||||
if (event & ARM_USART_EVENT_RX_PARITY_ERROR) {
|
|
||||||
UartErrorParity = 1U;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Init UART
|
|
||||||
// return: DAP_OK or DAP_ERROR
|
|
||||||
static uint8_t UART_Init (void) {
|
|
||||||
int32_t status;
|
|
||||||
uint8_t ret = DAP_ERROR;
|
|
||||||
|
|
||||||
UartConfigured = 0U;
|
|
||||||
UartReceiveEnabled = 0U;
|
|
||||||
UartTransmitEnabled = 0U;
|
|
||||||
UartTransmitActive = 0U;
|
|
||||||
UartErrorRxDataLost = 0U;
|
|
||||||
UartErrorFraming = 0U;
|
|
||||||
UartErrorParity = 0U;
|
|
||||||
UartTxIndexI = 0U;
|
|
||||||
UartTxIndexO = 0U;
|
|
||||||
UartRxIndexI = 0U;
|
|
||||||
UartRxIndexO = 0U;
|
|
||||||
UartTxNum = 0U;
|
|
||||||
|
|
||||||
status = pUSART->Initialize(USART_Callback);
|
|
||||||
if (status == ARM_DRIVER_OK) {
|
|
||||||
status = pUSART->PowerControl(ARM_POWER_FULL);
|
|
||||||
}
|
|
||||||
if (status == ARM_DRIVER_OK) {
|
|
||||||
ret = DAP_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Un-Init UART
|
|
||||||
static void UART_Uninit (void) {
|
|
||||||
UartConfigured = 0U;
|
|
||||||
|
|
||||||
pUSART->PowerControl(ARM_POWER_OFF);
|
|
||||||
pUSART->Uninitialize();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get UART Status
|
|
||||||
// return: status
|
|
||||||
static uint8_t UART_Get_Status (void) {
|
|
||||||
uint8_t status = 0U;
|
|
||||||
|
|
||||||
if (UartReceiveEnabled != 0U) {
|
|
||||||
status |= DAP_UART_STATUS_RX_ENABLED;
|
|
||||||
}
|
|
||||||
if (UartErrorRxDataLost != 0U) {
|
|
||||||
UartErrorRxDataLost = 0U;
|
|
||||||
status |= DAP_UART_STATUS_RX_DATA_LOST;
|
|
||||||
}
|
|
||||||
if (UartErrorFraming != 0U) {
|
|
||||||
UartErrorFraming = 0U;
|
|
||||||
status |= DAP_UART_STATUS_FRAMING_ERROR;
|
|
||||||
}
|
|
||||||
if (UartErrorParity != 0U) {
|
|
||||||
UartErrorParity = 0U;
|
|
||||||
status |= DAP_UART_STATUS_PARITY_ERROR;
|
|
||||||
}
|
|
||||||
if (UartTransmitEnabled != 0U) {
|
|
||||||
status |= DAP_UART_STATUS_TX_ENABLED;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (status);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Enable UART Receive
|
|
||||||
// return: DAP_OK or DAP_ERROR
|
|
||||||
static uint8_t UART_Receive_Enable (void) {
|
|
||||||
int32_t status;
|
|
||||||
uint8_t ret = DAP_ERROR;
|
|
||||||
|
|
||||||
if (UartReceiveEnabled == 0U) {
|
|
||||||
// Flush Buffers
|
|
||||||
UartRxIndexI = 0U;
|
|
||||||
UartRxIndexO = 0U;
|
|
||||||
|
|
||||||
UART_Receive();
|
|
||||||
status = pUSART->Control(ARM_USART_CONTROL_RX, 1U);
|
|
||||||
if (status == ARM_DRIVER_OK) {
|
|
||||||
UartReceiveEnabled = 1U;
|
|
||||||
ret = DAP_OK;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ret = DAP_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Enable UART Transmit
|
|
||||||
// return: DAP_OK or DAP_ERROR
|
|
||||||
static uint8_t UART_Transmit_Enable (void) {
|
|
||||||
int32_t status;
|
|
||||||
uint8_t ret = DAP_ERROR;
|
|
||||||
|
|
||||||
if (UartTransmitEnabled == 0U) {
|
|
||||||
// Flush Buffers
|
|
||||||
UartTransmitActive = 0U;
|
|
||||||
UartTxIndexI = 0U;
|
|
||||||
UartTxIndexO = 0U;
|
|
||||||
UartTxNum = 0U;
|
|
||||||
|
|
||||||
status = pUSART->Control(ARM_USART_CONTROL_TX, 1U);
|
|
||||||
if (status == ARM_DRIVER_OK) {
|
|
||||||
UartTransmitEnabled = 1U;
|
|
||||||
ret = DAP_OK;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ret = DAP_OK;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Disable UART Receive
|
|
||||||
static void UART_Receive_Disable (void) {
|
|
||||||
if (UartReceiveEnabled != 0U) {
|
|
||||||
pUSART->Control(ARM_USART_CONTROL_RX, 0U);
|
|
||||||
pUSART->Control(ARM_USART_ABORT_RECEIVE, 0U);
|
|
||||||
UartReceiveEnabled = 0U;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Disable UART Transmit
|
|
||||||
static void UART_Transmit_Disable (void) {
|
|
||||||
if (UartTransmitEnabled != 0U) {
|
|
||||||
pUSART->Control(ARM_USART_ABORT_SEND, 0U);
|
|
||||||
pUSART->Control(ARM_USART_CONTROL_TX, 0U);
|
|
||||||
UartTransmitActive = 0U;
|
|
||||||
UartTransmitEnabled = 0U;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flush UART Receive buffer
|
|
||||||
static void UART_Receive_Flush (void) {
|
|
||||||
pUSART->Control(ARM_USART_ABORT_RECEIVE, 0U);
|
|
||||||
UartRxIndexI = 0U;
|
|
||||||
UartRxIndexO = 0U;
|
|
||||||
if (UartReceiveEnabled != 0U) {
|
|
||||||
UART_Receive();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Flush UART Transmit buffer
|
|
||||||
static void UART_Transmit_Flush (void) {
|
|
||||||
pUSART->Control(ARM_USART_ABORT_SEND, 0U);
|
|
||||||
UartTransmitActive = 0U;
|
|
||||||
UartTxIndexI = 0U;
|
|
||||||
UartTxIndexO = 0U;
|
|
||||||
UartTxNum = 0U;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Receive data from target via UART
|
|
||||||
static void UART_Receive (void) {
|
|
||||||
uint32_t index;
|
|
||||||
|
|
||||||
index = UartRxIndexI & (DAP_UART_RX_BUFFER_SIZE - 1U);
|
|
||||||
pUSART->Receive(&UartRxBuf[index], UART_RX_BLOCK_SIZE);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Transmit available data to target via UART
|
|
||||||
static void UART_Transmit (void) {
|
|
||||||
uint32_t count;
|
|
||||||
uint32_t index;
|
|
||||||
|
|
||||||
count = UartTxIndexI - UartTxIndexO;
|
|
||||||
index = UartTxIndexO & (DAP_UART_TX_BUFFER_SIZE - 1U);
|
|
||||||
|
|
||||||
if (count != 0U) {
|
|
||||||
if ((index + count) <= DAP_UART_TX_BUFFER_SIZE) {
|
|
||||||
UartTxNum = count;
|
|
||||||
} else {
|
|
||||||
UartTxNum = DAP_UART_TX_BUFFER_SIZE - index;
|
|
||||||
}
|
|
||||||
UartTransmitActive = 1U;
|
|
||||||
pUSART->Send(&UartTxBuf[index], UartTxNum);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process UART Transport command and prepare response
|
|
||||||
// request: pointer to request data
|
|
||||||
// response: pointer to response data
|
|
||||||
// return: number of bytes in response (lower 16 bits)
|
|
||||||
// number of bytes in request (upper 16 bits)
|
|
||||||
uint32_t UART_Transport (const uint8_t *request, uint8_t *response) {
|
|
||||||
uint8_t transport;
|
|
||||||
uint8_t ret = DAP_ERROR;
|
|
||||||
|
|
||||||
transport = *request;
|
|
||||||
switch (transport) {
|
|
||||||
case DAP_UART_TRANSPORT_NONE:
|
|
||||||
switch (UartTransport) {
|
|
||||||
case DAP_UART_TRANSPORT_NONE:
|
|
||||||
ret = DAP_OK;
|
|
||||||
break;
|
|
||||||
case DAP_UART_TRANSPORT_USB_COM_PORT:
|
|
||||||
#if (DAP_UART_USB_COM_PORT != 0)
|
|
||||||
USB_COM_PORT_Activate(0U);
|
|
||||||
UartTransport = DAP_UART_TRANSPORT_NONE;
|
|
||||||
ret = DAP_OK;
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
case DAP_UART_TRANSPORT_DAP_COMMAND:
|
|
||||||
UART_Receive_Disable();
|
|
||||||
UART_Transmit_Disable();
|
|
||||||
UART_Uninit();
|
|
||||||
UartTransport = DAP_UART_TRANSPORT_NONE;
|
|
||||||
ret= DAP_OK;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case DAP_UART_TRANSPORT_USB_COM_PORT:
|
|
||||||
switch (UartTransport) {
|
|
||||||
case DAP_UART_TRANSPORT_NONE:
|
|
||||||
#if (DAP_UART_USB_COM_PORT != 0)
|
|
||||||
if (USB_COM_PORT_Activate(1U) == 0U) {
|
|
||||||
UartTransport = DAP_UART_TRANSPORT_USB_COM_PORT;
|
|
||||||
ret = DAP_OK;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
case DAP_UART_TRANSPORT_USB_COM_PORT:
|
|
||||||
ret = DAP_OK;
|
|
||||||
break;
|
|
||||||
case DAP_UART_TRANSPORT_DAP_COMMAND:
|
|
||||||
UART_Receive_Disable();
|
|
||||||
UART_Transmit_Disable();
|
|
||||||
UART_Uninit();
|
|
||||||
UartTransport = DAP_UART_TRANSPORT_NONE;
|
|
||||||
#if (DAP_UART_USB_COM_PORT != 0)
|
|
||||||
if (USB_COM_PORT_Activate(1U) == 0U) {
|
|
||||||
UartTransport = DAP_UART_TRANSPORT_USB_COM_PORT;
|
|
||||||
ret = DAP_OK;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case DAP_UART_TRANSPORT_DAP_COMMAND:
|
|
||||||
switch (UartTransport) {
|
|
||||||
case DAP_UART_TRANSPORT_NONE:
|
|
||||||
ret = UART_Init();
|
|
||||||
if (ret == DAP_OK) {
|
|
||||||
UartTransport = DAP_UART_TRANSPORT_DAP_COMMAND;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case DAP_UART_TRANSPORT_USB_COM_PORT:
|
|
||||||
#if (DAP_UART_USB_COM_PORT != 0)
|
|
||||||
USB_COM_PORT_Activate(0U);
|
|
||||||
UartTransport = DAP_UART_TRANSPORT_NONE;
|
|
||||||
#endif
|
|
||||||
ret = UART_Init();
|
|
||||||
if (ret == DAP_OK) {
|
|
||||||
UartTransport = DAP_UART_TRANSPORT_DAP_COMMAND;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case DAP_UART_TRANSPORT_DAP_COMMAND:
|
|
||||||
ret = DAP_OK;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
*response = ret;
|
|
||||||
|
|
||||||
return ((1U << 16) | 1U);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process UART Configure command and prepare response
|
|
||||||
// request: pointer to request data
|
|
||||||
// response: pointer to response data
|
|
||||||
// return: number of bytes in response (lower 16 bits)
|
|
||||||
// number of bytes in request (upper 16 bits)
|
|
||||||
uint32_t UART_Configure (const uint8_t *request, uint8_t *response) {
|
|
||||||
uint8_t control, status;
|
|
||||||
uint32_t baudrate;
|
|
||||||
int32_t result;
|
|
||||||
|
|
||||||
if (UartTransport != DAP_UART_TRANSPORT_DAP_COMMAND) {
|
|
||||||
status = DAP_UART_CFG_ERROR_DATA_BITS |
|
|
||||||
DAP_UART_CFG_ERROR_PARITY |
|
|
||||||
DAP_UART_CFG_ERROR_STOP_BITS;
|
|
||||||
baudrate = 0U; // baudrate error
|
|
||||||
} else {
|
|
||||||
|
|
||||||
status = 0U;
|
|
||||||
control = *request;
|
|
||||||
baudrate = (uint32_t)(*(request+1) << 0) |
|
|
||||||
(uint32_t)(*(request+2) << 8) |
|
|
||||||
(uint32_t)(*(request+3) << 16) |
|
|
||||||
(uint32_t)(*(request+4) << 24);
|
|
||||||
|
|
||||||
result = pUSART->Control(control |
|
|
||||||
ARM_USART_MODE_ASYNCHRONOUS |
|
|
||||||
ARM_USART_FLOW_CONTROL_NONE,
|
|
||||||
baudrate);
|
|
||||||
if (result == ARM_DRIVER_OK) {
|
|
||||||
UartConfigured = 1U;
|
|
||||||
} else {
|
|
||||||
UartConfigured = 0U;
|
|
||||||
switch (result) {
|
|
||||||
case ARM_USART_ERROR_BAUDRATE:
|
|
||||||
status = 0U;
|
|
||||||
baudrate = 0U;
|
|
||||||
break;
|
|
||||||
case ARM_USART_ERROR_DATA_BITS:
|
|
||||||
status = DAP_UART_CFG_ERROR_DATA_BITS;
|
|
||||||
break;
|
|
||||||
case ARM_USART_ERROR_PARITY:
|
|
||||||
status = DAP_UART_CFG_ERROR_PARITY;
|
|
||||||
break;
|
|
||||||
case ARM_USART_ERROR_STOP_BITS:
|
|
||||||
status = DAP_UART_CFG_ERROR_STOP_BITS;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
status = DAP_UART_CFG_ERROR_DATA_BITS |
|
|
||||||
DAP_UART_CFG_ERROR_PARITY |
|
|
||||||
DAP_UART_CFG_ERROR_STOP_BITS;
|
|
||||||
baudrate = 0U;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
*response++ = status;
|
|
||||||
*response++ = (uint8_t)(baudrate >> 0);
|
|
||||||
*response++ = (uint8_t)(baudrate >> 8);
|
|
||||||
*response++ = (uint8_t)(baudrate >> 16);
|
|
||||||
*response = (uint8_t)(baudrate >> 24);
|
|
||||||
|
|
||||||
return ((5U << 16) | 5U);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process UART Control command and prepare response
|
|
||||||
// request: pointer to request data
|
|
||||||
// response: pointer to response data
|
|
||||||
// return: number of bytes in response (lower 16 bits)
|
|
||||||
// number of bytes in request (upper 16 bits)
|
|
||||||
uint32_t UART_Control (const uint8_t *request, uint8_t *response) {
|
|
||||||
uint8_t control;
|
|
||||||
uint8_t result;
|
|
||||||
uint8_t ret = DAP_OK;
|
|
||||||
|
|
||||||
if (UartTransport != DAP_UART_TRANSPORT_DAP_COMMAND) {
|
|
||||||
ret = DAP_ERROR;
|
|
||||||
} else {
|
|
||||||
|
|
||||||
control = *request;
|
|
||||||
|
|
||||||
if ((control & DAP_UART_CONTROL_RX_DISABLE) != 0U) {
|
|
||||||
// Receive disable
|
|
||||||
UART_Receive_Disable();
|
|
||||||
} else if ((control & DAP_UART_CONTROL_RX_ENABLE) != 0U) {
|
|
||||||
// Receive enable
|
|
||||||
if (UartConfigured != 0U) {
|
|
||||||
result = UART_Receive_Enable();
|
|
||||||
if (result != DAP_OK) {
|
|
||||||
ret = DAP_ERROR;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ret = DAP_ERROR;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ((control & DAP_UART_CONTROL_RX_BUF_FLUSH) != 0U) {
|
|
||||||
UART_Receive_Flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((control & DAP_UART_CONTROL_TX_DISABLE) != 0U) {
|
|
||||||
// Transmit disable
|
|
||||||
UART_Transmit_Disable();
|
|
||||||
} else if ((control & DAP_UART_CONTROL_TX_ENABLE) != 0U) {
|
|
||||||
// Transmit enable
|
|
||||||
if (UartConfigured != 0U) {
|
|
||||||
result = UART_Transmit_Enable();
|
|
||||||
if (result != DAP_OK) {
|
|
||||||
ret = DAP_ERROR;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
ret = DAP_ERROR;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ((control & DAP_UART_CONTROL_TX_BUF_FLUSH) != 0U) {
|
|
||||||
UART_Transmit_Flush();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
*response = ret;
|
|
||||||
|
|
||||||
return ((1U << 16) | 1U);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process UART Status command and prepare response
|
|
||||||
// response: pointer to response data
|
|
||||||
// return: number of bytes in response (lower 16 bits)
|
|
||||||
// number of bytes in request (upper 16 bits)
|
|
||||||
uint32_t UART_Status (uint8_t *response) {
|
|
||||||
uint32_t rx_cnt, tx_cnt;
|
|
||||||
uint32_t cnt;
|
|
||||||
uint8_t status;
|
|
||||||
|
|
||||||
if ((UartTransport != DAP_UART_TRANSPORT_DAP_COMMAND) ||
|
|
||||||
(UartConfigured == 0U)) {
|
|
||||||
rx_cnt = 0U;
|
|
||||||
tx_cnt = 0U;
|
|
||||||
status = 0U;
|
|
||||||
} else {
|
|
||||||
|
|
||||||
rx_cnt = UartRxIndexI - UartRxIndexO;
|
|
||||||
rx_cnt += pUSART->GetRxCount();
|
|
||||||
if (rx_cnt > (DAP_UART_RX_BUFFER_SIZE - (UART_RX_BLOCK_SIZE*2))) {
|
|
||||||
// Overflow
|
|
||||||
UartErrorRxDataLost = 1U;
|
|
||||||
rx_cnt = (DAP_UART_RX_BUFFER_SIZE - (UART_RX_BLOCK_SIZE*2));
|
|
||||||
UartRxIndexO = UartRxIndexI - rx_cnt;
|
|
||||||
}
|
|
||||||
|
|
||||||
tx_cnt = UartTxIndexI - UartTxIndexO;
|
|
||||||
cnt = pUSART->GetTxCount();
|
|
||||||
if (UartTransmitActive != 0U) {
|
|
||||||
tx_cnt -= cnt;
|
|
||||||
}
|
|
||||||
|
|
||||||
status = UART_Get_Status();
|
|
||||||
}
|
|
||||||
|
|
||||||
*response++ = status;
|
|
||||||
*response++ = (uint8_t)(rx_cnt >> 0);
|
|
||||||
*response++ = (uint8_t)(rx_cnt >> 8);
|
|
||||||
*response++ = (uint8_t)(rx_cnt >> 16);
|
|
||||||
*response++ = (uint8_t)(rx_cnt >> 24);
|
|
||||||
*response++ = (uint8_t)(tx_cnt >> 0);
|
|
||||||
*response++ = (uint8_t)(tx_cnt >> 8);
|
|
||||||
*response++ = (uint8_t)(tx_cnt >> 16);
|
|
||||||
*response = (uint8_t)(tx_cnt >> 24);
|
|
||||||
|
|
||||||
return ((0U << 16) | 9U);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process UART Transfer command and prepare response
|
|
||||||
// request: pointer to request data
|
|
||||||
// response: pointer to response data
|
|
||||||
// return: number of bytes in response (lower 16 bits)
|
|
||||||
// number of bytes in request (upper 16 bits)
|
|
||||||
uint32_t UART_Transfer (const uint8_t *request, uint8_t *response) {
|
|
||||||
uint32_t rx_cnt, tx_cnt;
|
|
||||||
uint32_t rx_num, tx_num;
|
|
||||||
uint8_t *rx_data;
|
|
||||||
const
|
|
||||||
uint8_t *tx_data;
|
|
||||||
uint32_t num;
|
|
||||||
uint32_t index;
|
|
||||||
uint8_t status;
|
|
||||||
|
|
||||||
if (UartTransport != DAP_UART_TRANSPORT_DAP_COMMAND) {
|
|
||||||
status = 0U;
|
|
||||||
rx_cnt = 0U;
|
|
||||||
tx_cnt = 0U;
|
|
||||||
} else {
|
|
||||||
|
|
||||||
// RX Data
|
|
||||||
rx_cnt = ((uint32_t)(*(request+0) << 0) |
|
|
||||||
(uint32_t)(*(request+1) << 8));
|
|
||||||
|
|
||||||
if (rx_cnt > (DAP_PACKET_SIZE - 6U)) {
|
|
||||||
rx_cnt = (DAP_PACKET_SIZE - 6U);
|
|
||||||
}
|
|
||||||
rx_num = UartRxIndexI - UartRxIndexO;
|
|
||||||
rx_num += pUSART->GetRxCount();
|
|
||||||
if (rx_num > (DAP_UART_RX_BUFFER_SIZE - (UART_RX_BLOCK_SIZE*2))) {
|
|
||||||
// Overflow
|
|
||||||
UartErrorRxDataLost = 1U;
|
|
||||||
rx_num = (DAP_UART_RX_BUFFER_SIZE - (UART_RX_BLOCK_SIZE*2));
|
|
||||||
UartRxIndexO = UartRxIndexI - rx_num;
|
|
||||||
}
|
|
||||||
if (rx_cnt > rx_num) {
|
|
||||||
rx_cnt = rx_num;
|
|
||||||
}
|
|
||||||
|
|
||||||
rx_data = (response+5);
|
|
||||||
index = UartRxIndexO & (DAP_UART_RX_BUFFER_SIZE - 1U);
|
|
||||||
if ((index + rx_cnt) <= DAP_UART_RX_BUFFER_SIZE) {
|
|
||||||
memcpy( rx_data, &UartRxBuf[index], rx_cnt);
|
|
||||||
} else {
|
|
||||||
num = DAP_UART_RX_BUFFER_SIZE - index;
|
|
||||||
memcpy( rx_data, &UartRxBuf[index], num);
|
|
||||||
memcpy(&rx_data[num], &UartRxBuf[0], rx_cnt - num);
|
|
||||||
}
|
|
||||||
UartRxIndexO += rx_cnt;
|
|
||||||
|
|
||||||
// TX Data
|
|
||||||
tx_cnt = ((uint32_t)(*(request+2) << 0) |
|
|
||||||
(uint32_t)(*(request+3) << 8));
|
|
||||||
tx_data = (request+4);
|
|
||||||
|
|
||||||
if (tx_cnt > (DAP_PACKET_SIZE - 5U)) {
|
|
||||||
tx_cnt = (DAP_PACKET_SIZE - 5U);
|
|
||||||
}
|
|
||||||
tx_num = UartTxIndexI - UartTxIndexO;
|
|
||||||
num = pUSART->GetTxCount();
|
|
||||||
if (UartTransmitActive != 0U) {
|
|
||||||
tx_num -= num;
|
|
||||||
}
|
|
||||||
if (tx_cnt > (DAP_UART_TX_BUFFER_SIZE - tx_num)) {
|
|
||||||
tx_cnt = (DAP_UART_TX_BUFFER_SIZE - tx_num);
|
|
||||||
}
|
|
||||||
|
|
||||||
index = UartTxIndexI & (DAP_UART_TX_BUFFER_SIZE - 1U);
|
|
||||||
if ((index + tx_cnt) <= DAP_UART_TX_BUFFER_SIZE) {
|
|
||||||
memcpy(&UartTxBuf[index], tx_data, tx_cnt);
|
|
||||||
} else {
|
|
||||||
num = DAP_UART_TX_BUFFER_SIZE - index;
|
|
||||||
memcpy(&UartTxBuf[index], tx_data, num);
|
|
||||||
memcpy(&UartTxBuf[0], &tx_data[num], tx_cnt - num);
|
|
||||||
}
|
|
||||||
UartTxIndexI += tx_cnt;
|
|
||||||
|
|
||||||
if (UartTransmitActive == 0U) {
|
|
||||||
UART_Transmit();
|
|
||||||
}
|
|
||||||
|
|
||||||
status = UART_Get_Status();
|
|
||||||
}
|
|
||||||
|
|
||||||
*response++ = status;
|
|
||||||
*response++ = (uint8_t)(tx_cnt >> 0);
|
|
||||||
*response++ = (uint8_t)(tx_cnt >> 8);
|
|
||||||
*response++ = (uint8_t)(rx_cnt >> 0);
|
|
||||||
*response = (uint8_t)(rx_cnt >> 8);
|
|
||||||
|
|
||||||
return (((4U + tx_cnt) << 16) | (5U + rx_cnt));
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* DAP_UART */
|
|
||||||
@ -1,29 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2019-2021 Arm Limited.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
# not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
cmake_minimum_required(VERSION 3.15.6)
|
|
||||||
|
|
||||||
project(CMSISNN)
|
|
||||||
|
|
||||||
set(CMSIS_PATH "${CMAKE_CURRENT_SOURCE_DIR}/../..")
|
|
||||||
|
|
||||||
option(BUILD_CMSIS_NN_FUNCTIONS "Build CMSIS-NN Source." ON)
|
|
||||||
|
|
||||||
if(BUILD_CMSIS_NN_FUNCTIONS)
|
|
||||||
add_subdirectory(Source)
|
|
||||||
endif()
|
|
||||||
@ -1,169 +0,0 @@
|
|||||||
/******************************************************************************
|
|
||||||
* @file arm_nn_math_types.h
|
|
||||||
* @brief Compiler include and basic types
|
|
||||||
* @version V1.1.0
|
|
||||||
* @date 09 March 2022
|
|
||||||
* Target Processor: Cortex-M
|
|
||||||
******************************************************************************/
|
|
||||||
/*
|
|
||||||
* Copyright (c) 2010-2022 Arm Limited or its affiliates.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
Copied from CMSIS/DSP/arm_math_types.h and modified
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _ARM_NN_MATH_TYPES_H_
|
|
||||||
|
|
||||||
#define _ARM_NN_MATH_TYPES_H_
|
|
||||||
|
|
||||||
/* DSP inlcude for enum arm_status. */
|
|
||||||
#include "arm_math_types.h"
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Compiler specific diagnostic adjustment */
|
|
||||||
#if defined(__CC_ARM)
|
|
||||||
|
|
||||||
#elif defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
|
|
||||||
|
|
||||||
#elif defined(__GNUC__)
|
|
||||||
|
|
||||||
#elif defined(__ICCARM__)
|
|
||||||
|
|
||||||
#elif defined(__TI_ARM__)
|
|
||||||
|
|
||||||
#elif defined(__CSMC__)
|
|
||||||
|
|
||||||
#elif defined(__TASKING__)
|
|
||||||
|
|
||||||
#elif defined(_MSC_VER)
|
|
||||||
|
|
||||||
#else
|
|
||||||
#error Unknown compiler
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Included for instrinsics definitions */
|
|
||||||
#if defined(_MSC_VER)
|
|
||||||
#include <stdint.h>
|
|
||||||
#ifndef __STATIC_FORCEINLINE
|
|
||||||
#define __STATIC_FORCEINLINE static __forceinline
|
|
||||||
#endif
|
|
||||||
#ifndef __STATIC_INLINE
|
|
||||||
#define __STATIC_INLINE static __inline
|
|
||||||
#endif
|
|
||||||
#ifndef __ALIGNED
|
|
||||||
#define __ALIGNED(x) __declspec(align(x))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#elif defined(__GNUC_PYTHON__)
|
|
||||||
#include <stdint.h>
|
|
||||||
#ifndef __ALIGNED
|
|
||||||
#define __ALIGNED(x) __attribute__((aligned(x)))
|
|
||||||
#endif
|
|
||||||
#ifndef __STATIC_FORCEINLINE
|
|
||||||
#define __STATIC_FORCEINLINE static inline __attribute__((always_inline))
|
|
||||||
#endif
|
|
||||||
#ifndef __STATIC_INLINE
|
|
||||||
#define __STATIC_INLINE static inline
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#else
|
|
||||||
#include "cmsis_compiler.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include <float.h>
|
|
||||||
#include <limits.h>
|
|
||||||
#include <math.h>
|
|
||||||
#include <string.h>
|
|
||||||
|
|
||||||
/* evaluate ARM DSP feature */
|
|
||||||
#if (defined(__ARM_FEATURE_DSP) && (__ARM_FEATURE_DSP == 1))
|
|
||||||
#ifndef ARM_MATH_DSP
|
|
||||||
#define ARM_MATH_DSP 1
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if __ARM_FEATURE_MVE
|
|
||||||
#ifndef ARM_MATH_MVEI
|
|
||||||
#define ARM_MATH_MVEI
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Compiler specific diagnostic adjustment */
|
|
||||||
#if defined(__CC_ARM)
|
|
||||||
|
|
||||||
#elif defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
|
|
||||||
|
|
||||||
#elif defined(__GNUC__)
|
|
||||||
// #pragma GCC diagnostic pop
|
|
||||||
|
|
||||||
#elif defined(__ICCARM__)
|
|
||||||
|
|
||||||
#elif defined(__TI_ARM__)
|
|
||||||
|
|
||||||
#elif defined(__CSMC__)
|
|
||||||
|
|
||||||
#elif defined(__TASKING__)
|
|
||||||
|
|
||||||
#elif defined(_MSC_VER)
|
|
||||||
|
|
||||||
#else
|
|
||||||
#error Unknown compiler
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if __ARM_FEATURE_MVE
|
|
||||||
#include <arm_mve.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
extern "C" {
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Add necessary typedefs
|
|
||||||
*/
|
|
||||||
|
|
||||||
#define NN_Q31_MAX ((q31_t)(0x7FFFFFFFL))
|
|
||||||
#define NN_Q15_MAX ((q15_t)(0x7FFF))
|
|
||||||
#define NN_Q7_MAX ((q7_t)(0x7F))
|
|
||||||
#define NN_Q31_MIN ((q31_t)(0x80000000L))
|
|
||||||
#define NN_Q15_MIN ((q15_t)(0x8000))
|
|
||||||
#define NN_Q7_MIN ((q7_t)(0x80))
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Error status returned by some functions in the library.
|
|
||||||
*/
|
|
||||||
|
|
||||||
typedef enum
|
|
||||||
{
|
|
||||||
ARM_CMSIS_NN_SUCCESS = 0, /**< No error */
|
|
||||||
ARM_CMSIS_NN_ARG_ERROR = -1, /**< One or more arguments are incorrect */
|
|
||||||
ARM_CMSIS_NN_NO_IMPL_ERROR = -2, /**< No implementation available */
|
|
||||||
} arm_cmsis_nn_status;
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif /*ifndef _ARM_NN_MATH_TYPES_H_ */
|
|
||||||
@ -1,56 +0,0 @@
|
|||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_nn_tables.h
|
|
||||||
* Description: Extern declaration for NN tables
|
|
||||||
*
|
|
||||||
* $Date: 17. August 2021
|
|
||||||
* $Revision: V.1.0.2
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
/*
|
|
||||||
* Copyright (C) 2010-2018 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
#ifndef _ARM_NN_TABLES_H
|
|
||||||
#define _ARM_NN_TABLES_H
|
|
||||||
|
|
||||||
#include "arm_nn_math_types.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief tables for various activation functions
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
extern const q15_t sigmoidTable_q15[256];
|
|
||||||
extern const q7_t sigmoidTable_q7[256];
|
|
||||||
|
|
||||||
extern const q7_t tanhTable_q7[256];
|
|
||||||
extern const q15_t tanhTable_q15[256];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief 2-way tables for various activation functions
|
|
||||||
*
|
|
||||||
* 2-way table, H table for value larger than 1/4
|
|
||||||
* L table for value smaller than 1/4, H table for remaining
|
|
||||||
* We have this only for the q15_t version. It does not make
|
|
||||||
* sense to have it for q7_t type
|
|
||||||
*/
|
|
||||||
extern const q15_t sigmoidHTable_q15[192];
|
|
||||||
extern const q15_t sigmoidLTable_q15[128];
|
|
||||||
|
|
||||||
#endif /* ARM_NN_TABLES_H */
|
|
||||||
@ -1,137 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2020-2022 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_nn_types.h
|
|
||||||
* Description: Public header file to contain the CMSIS-NN structs for the
|
|
||||||
* TensorFlowLite micro compliant functions
|
|
||||||
*
|
|
||||||
* $Date: 22. Februari 2022
|
|
||||||
* $Revision: V.2.1.0
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#ifndef _ARM_NN_TYPES_H
|
|
||||||
#define _ARM_NN_TYPES_H
|
|
||||||
|
|
||||||
#include <stdint.h>
|
|
||||||
|
|
||||||
/** CMSIS-NN object to contain the width and height of a tile */
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
int32_t w; /**< Width */
|
|
||||||
int32_t h; /**< Height */
|
|
||||||
} cmsis_nn_tile;
|
|
||||||
|
|
||||||
/** CMSIS-NN object used for the function context. */
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
void *buf; /**< Pointer to a buffer needed for the optimization */
|
|
||||||
int32_t size; /**< Buffer size */
|
|
||||||
} cmsis_nn_context;
|
|
||||||
|
|
||||||
/** CMSIS-NN object to contain the dimensions of the tensors */
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
int32_t n; /**< Generic dimension to contain either the batch size or output channels.
|
|
||||||
Please refer to the function documentation for more information */
|
|
||||||
int32_t h; /**< Height */
|
|
||||||
int32_t w; /**< Width */
|
|
||||||
int32_t c; /**< Input channels */
|
|
||||||
} cmsis_nn_dims;
|
|
||||||
|
|
||||||
/** CMSIS-NN object for the per-channel quantization parameters */
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
int32_t *multiplier; /**< Multiplier values */
|
|
||||||
int32_t *shift; /**< Shift values */
|
|
||||||
} cmsis_nn_per_channel_quant_params;
|
|
||||||
|
|
||||||
/** CMSIS-NN object for the per-tensor quantization parameters */
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
int32_t multiplier; /**< Multiplier value */
|
|
||||||
int32_t shift; /**< Shift value */
|
|
||||||
} cmsis_nn_per_tensor_quant_params;
|
|
||||||
|
|
||||||
/** CMSIS-NN object for the quantized Relu activation */
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
int32_t min; /**< Min value used to clamp the result */
|
|
||||||
int32_t max; /**< Max value used to clamp the result */
|
|
||||||
} cmsis_nn_activation;
|
|
||||||
|
|
||||||
/** CMSIS-NN object for the convolution layer parameters */
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
int32_t input_offset; /**< Zero value for the input tensor */
|
|
||||||
int32_t output_offset; /**< Zero value for the output tensor */
|
|
||||||
cmsis_nn_tile stride;
|
|
||||||
cmsis_nn_tile padding;
|
|
||||||
cmsis_nn_tile dilation;
|
|
||||||
cmsis_nn_activation activation;
|
|
||||||
} cmsis_nn_conv_params;
|
|
||||||
|
|
||||||
/** CMSIS-NN object for Depthwise convolution layer parameters */
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
int32_t input_offset; /**< Zero value for the input tensor */
|
|
||||||
int32_t output_offset; /**< Zero value for the output tensor */
|
|
||||||
int32_t ch_mult; /**< Channel Multiplier. ch_mult * in_ch = out_ch */
|
|
||||||
cmsis_nn_tile stride;
|
|
||||||
cmsis_nn_tile padding;
|
|
||||||
cmsis_nn_tile dilation;
|
|
||||||
cmsis_nn_activation activation;
|
|
||||||
} cmsis_nn_dw_conv_params;
|
|
||||||
/** CMSIS-NN object for pooling layer parameters */
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
cmsis_nn_tile stride;
|
|
||||||
cmsis_nn_tile padding;
|
|
||||||
cmsis_nn_activation activation;
|
|
||||||
} cmsis_nn_pool_params;
|
|
||||||
|
|
||||||
/** CMSIS-NN object for Fully Connected layer parameters */
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
int32_t input_offset; /**< Zero value for the input tensor */
|
|
||||||
int32_t filter_offset; /**< Zero value for the filter tensor. Not used */
|
|
||||||
int32_t output_offset; /**< Zero value for the output tensor */
|
|
||||||
cmsis_nn_activation activation;
|
|
||||||
} cmsis_nn_fc_params;
|
|
||||||
|
|
||||||
/** CMSIS-NN object for SVDF layer parameters */
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
int32_t rank;
|
|
||||||
int32_t input_offset; /**< Zero value for the input tensor */
|
|
||||||
int32_t output_offset; /**< Zero value for the output tensor */
|
|
||||||
cmsis_nn_activation input_activation;
|
|
||||||
cmsis_nn_activation output_activation;
|
|
||||||
} cmsis_nn_svdf_params;
|
|
||||||
|
|
||||||
/** CMSIS-NN object for Softmax s16 layer parameters */
|
|
||||||
typedef struct
|
|
||||||
{
|
|
||||||
const int16_t *exp_lut;
|
|
||||||
const int16_t *one_by_one_lut;
|
|
||||||
} cmsis_nn_softmax_lut_s16;
|
|
||||||
|
|
||||||
#endif // _ARM_NN_TYPES_H
|
|
||||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,30 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2019-2021 Arm Limited. All rights reserved.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
# not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
project(CMSISNNActivation)
|
|
||||||
|
|
||||||
file(GLOB SRC "./*_s8.c")
|
|
||||||
add_library(CMSISNNActivation STATIC ${SRC})
|
|
||||||
|
|
||||||
### Includes
|
|
||||||
target_include_directories(CMSISNNActivation PUBLIC "${NN}/Include")
|
|
||||||
target_include_directories(CMSISNNActivation PUBLIC "${ROOT}/CMSIS/Core/Include")
|
|
||||||
target_include_directories(CMSISNNActivation PUBLIC "${ROOT}/CMSIS/DSP/Include")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -1,96 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_nn_activations_q15.c
|
|
||||||
* Description: Q15 neural network activation function using direct table look-up
|
|
||||||
*
|
|
||||||
* $Date: 09. October 2020
|
|
||||||
* $Revision: V.1.0.1
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nn_tables.h"
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup Acti
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief neural network activation function using direct table look-up
|
|
||||||
*
|
|
||||||
* @note Refer header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
void arm_nn_activations_direct_q15(q15_t *data, uint16_t size, uint16_t int_width, arm_nn_activation_type type)
|
|
||||||
{
|
|
||||||
uint16_t i = size;
|
|
||||||
q15_t *pIn = data;
|
|
||||||
q15_t *pOut = data;
|
|
||||||
uint16_t shift_size = 8 + 3 - int_width;
|
|
||||||
uint32_t bit_mask = 0x7FF >> int_width;
|
|
||||||
uint32_t full_frac = bit_mask + 1;
|
|
||||||
const q15_t *lookup_table;
|
|
||||||
|
|
||||||
switch (type)
|
|
||||||
{
|
|
||||||
case ARM_SIGMOID:
|
|
||||||
lookup_table = sigmoidTable_q15;
|
|
||||||
break;
|
|
||||||
case ARM_TANH:
|
|
||||||
default:
|
|
||||||
lookup_table = tanhTable_q15;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (i)
|
|
||||||
{
|
|
||||||
q15_t out;
|
|
||||||
q15_t in = *pIn++;
|
|
||||||
q15_t frac = (uint32_t)in & bit_mask;
|
|
||||||
q15_t value = lookup_table[(uint8_t)(in >> shift_size)];
|
|
||||||
if ((in >> shift_size) != 0x7f)
|
|
||||||
{
|
|
||||||
q15_t value2 = lookup_table[(uint8_t)(1 + ((uint8_t)(in >> shift_size)))];
|
|
||||||
/* doing the interpolation here for better accuracy */
|
|
||||||
out = ((q31_t)(full_frac - frac) * value + (q31_t)value2 * frac) >> shift_size;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* the largest positive value does not have a right side for linear interpolation */
|
|
||||||
out = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
*pOut++ = out;
|
|
||||||
i--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of Acti group
|
|
||||||
*/
|
|
||||||
@ -1,89 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_nn_activations_q7.c
|
|
||||||
* Description: Q7 neural network activation function using direct table look-up
|
|
||||||
*
|
|
||||||
* $Date: 09. October 2020
|
|
||||||
* $Revision: V.1.0.1
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nn_tables.h"
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup Acti
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Q7 neural network activation function using direct table look-up
|
|
||||||
* @param[in,out] data pointer to input
|
|
||||||
* @param[in] size number of elements
|
|
||||||
* @param[in] int_width bit-width of the integer part, assume to be smaller than 3
|
|
||||||
* @param[in] type type of activation functions
|
|
||||||
*
|
|
||||||
* @details
|
|
||||||
*
|
|
||||||
* This is the direct table look-up approach.
|
|
||||||
*
|
|
||||||
* Assume here the integer part of the fixed-point is <= 3.
|
|
||||||
* More than 3 just not making much sense, makes no difference with
|
|
||||||
* saturation followed by any of these activation functions.
|
|
||||||
*/
|
|
||||||
|
|
||||||
void arm_nn_activations_direct_q7(q7_t *data, uint16_t size, uint16_t int_width, arm_nn_activation_type type)
|
|
||||||
{
|
|
||||||
uint16_t i = size;
|
|
||||||
q7_t *pIn = data;
|
|
||||||
q7_t *pOut = data;
|
|
||||||
q7_t in;
|
|
||||||
q7_t out;
|
|
||||||
uint16_t shift_size = 3 - int_width;
|
|
||||||
const q7_t *lookup_table;
|
|
||||||
switch (type)
|
|
||||||
{
|
|
||||||
case ARM_SIGMOID:
|
|
||||||
lookup_table = sigmoidTable_q7;
|
|
||||||
break;
|
|
||||||
case ARM_TANH:
|
|
||||||
default:
|
|
||||||
lookup_table = tanhTable_q7;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
while (i)
|
|
||||||
{
|
|
||||||
in = *pIn++;
|
|
||||||
out = lookup_table[(uint8_t)(in >> shift_size)];
|
|
||||||
*pOut++ = out;
|
|
||||||
i--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of Acti group
|
|
||||||
*/
|
|
||||||
@ -1,65 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2019 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_relu6_s8.c
|
|
||||||
* Description: Basic s8 version of ReLU6
|
|
||||||
*
|
|
||||||
* $Date: 09. October 2020
|
|
||||||
* $Revision: V.1.0.1
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup Acti
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Basic ReLU6 function
|
|
||||||
*
|
|
||||||
* Refer to header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
void arm_relu6_s8(q7_t *data, uint16_t size)
|
|
||||||
{
|
|
||||||
int32_t i;
|
|
||||||
|
|
||||||
for (i = 0; i < size; i++)
|
|
||||||
{
|
|
||||||
int32_t ip = data[i];
|
|
||||||
|
|
||||||
ip = MAX(ip, 0);
|
|
||||||
data[i] = MIN(ip, 6);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of Acti group
|
|
||||||
*/
|
|
||||||
@ -1,104 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_relu_q15.c
|
|
||||||
* Description: Q15 version of ReLU
|
|
||||||
*
|
|
||||||
* $Date: 09. October 2020
|
|
||||||
* $Revision: V.1.0.2
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup Acti
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Q15 RELU function
|
|
||||||
* @param[in,out] data pointer to input
|
|
||||||
* @param[in] size number of elements
|
|
||||||
*
|
|
||||||
* @details
|
|
||||||
*
|
|
||||||
* Optimized relu with QSUB instructions.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
void arm_relu_q15(q15_t *data, uint16_t size)
|
|
||||||
{
|
|
||||||
|
|
||||||
#if defined(ARM_MATH_DSP)
|
|
||||||
/* Run the following code for M cores with DSP extension */
|
|
||||||
|
|
||||||
uint16_t i = size >> 1;
|
|
||||||
q15_t *input = data;
|
|
||||||
q15_t *output = data;
|
|
||||||
q31_t in;
|
|
||||||
q31_t buf;
|
|
||||||
q31_t mask;
|
|
||||||
|
|
||||||
while (i)
|
|
||||||
{
|
|
||||||
in = read_q15x2_ia(&input);
|
|
||||||
|
|
||||||
/* extract the first bit */
|
|
||||||
buf = __ROR(in & 0x80008000, 15);
|
|
||||||
|
|
||||||
/* if MSB=1, mask will be 0xFF, 0x0 otherwise */
|
|
||||||
mask = __QSUB16(0x00000000, buf);
|
|
||||||
|
|
||||||
arm_nn_write_q15x2_ia(&output, in & (~mask));
|
|
||||||
i--;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (size & 0x1)
|
|
||||||
{
|
|
||||||
if (*input < 0)
|
|
||||||
{
|
|
||||||
*input = 0;
|
|
||||||
}
|
|
||||||
input++;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
/* Run the following code as reference implementation for M cores without DSP extension */
|
|
||||||
uint16_t i;
|
|
||||||
|
|
||||||
for (i = 0; i < size; i++)
|
|
||||||
{
|
|
||||||
if (data[i] < 0)
|
|
||||||
data[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* ARM_MATH_DSP */
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of Acti group
|
|
||||||
*/
|
|
||||||
@ -1,109 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_relu_q7.c
|
|
||||||
* Description: Q7 version of ReLU
|
|
||||||
*
|
|
||||||
* $Date: 20. July 2021
|
|
||||||
* $Revision: V.1.1.3
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup Acti
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Q7 RELU function
|
|
||||||
* @param[in,out] data pointer to input
|
|
||||||
* @param[in] size number of elements
|
|
||||||
*
|
|
||||||
* @details
|
|
||||||
*
|
|
||||||
* Optimized relu with QSUB instructions.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
void arm_relu_q7(q7_t *data, uint16_t size)
|
|
||||||
{
|
|
||||||
|
|
||||||
#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
|
|
||||||
/* Run the following code for M cores with DSP extension */
|
|
||||||
|
|
||||||
uint16_t i = size >> 2;
|
|
||||||
q7_t *input = data;
|
|
||||||
q7_t *output = data;
|
|
||||||
q31_t in;
|
|
||||||
q31_t buf;
|
|
||||||
q31_t mask;
|
|
||||||
|
|
||||||
while (i)
|
|
||||||
{
|
|
||||||
in = arm_nn_read_q7x4_ia((const q7_t **)&input);
|
|
||||||
|
|
||||||
/* extract the first bit */
|
|
||||||
buf = (int32_t)__ROR((uint32_t)in & 0x80808080, 7);
|
|
||||||
|
|
||||||
/* if MSB=1, mask will be 0xFF, 0x0 otherwise */
|
|
||||||
mask = __QSUB8(0x00000000, buf);
|
|
||||||
|
|
||||||
arm_nn_write_q7x4_ia(&output, in & (~mask));
|
|
||||||
|
|
||||||
i--;
|
|
||||||
}
|
|
||||||
|
|
||||||
i = size & 0x3;
|
|
||||||
while (i)
|
|
||||||
{
|
|
||||||
if (*input < 0)
|
|
||||||
{
|
|
||||||
*input = 0;
|
|
||||||
}
|
|
||||||
input++;
|
|
||||||
i--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
/* Run the following code as reference implementation for cores without DSP extension */
|
|
||||||
|
|
||||||
uint16_t i;
|
|
||||||
|
|
||||||
for (i = 0; i < size; i++)
|
|
||||||
{
|
|
||||||
if (data[i] < 0)
|
|
||||||
data[i] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of Acti group
|
|
||||||
*/
|
|
||||||
@ -1,31 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2019-2021 Arm Limited. All rights reserved.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
# not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
project(CMSISNNBasicMaths)
|
|
||||||
|
|
||||||
file(GLOB SRC "./*_*.c")
|
|
||||||
|
|
||||||
add_library(CMSISNNBasicMaths STATIC ${SRC})
|
|
||||||
|
|
||||||
### Includes
|
|
||||||
target_include_directories(CMSISNNBasicMaths PUBLIC "${NN}/Include")
|
|
||||||
target_include_directories(CMSISNNBasicMaths PUBLIC "${ROOT}/CMSIS/Core/Include")
|
|
||||||
target_include_directories(CMSISNNBasicMaths PUBLIC "${ROOT}/CMSIS/DSP/Include")
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -1,105 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2022 Arm Limited or its affiliates.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_elementwise_add_s16
|
|
||||||
* Description: Elementwise add
|
|
||||||
*
|
|
||||||
* $Date: 14 Februari 2022
|
|
||||||
* $Revision: V.1.0.0
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M CPUs
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup BasicMath
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* s16 elementwise add
|
|
||||||
*
|
|
||||||
* Refer header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Note: __SHIFT is expected to be <=0 */
|
|
||||||
|
|
||||||
arm_status arm_elementwise_add_s16(const int16_t *input_1_vect,
|
|
||||||
const int16_t *input_2_vect,
|
|
||||||
const int32_t input_1_offset,
|
|
||||||
const int32_t input_1_mult,
|
|
||||||
const int32_t input_1_shift,
|
|
||||||
const int32_t input_2_offset,
|
|
||||||
const int32_t input_2_mult,
|
|
||||||
const int32_t input_2_shift,
|
|
||||||
const int32_t left_shift,
|
|
||||||
int16_t *output,
|
|
||||||
const int32_t out_offset,
|
|
||||||
const int32_t out_mult,
|
|
||||||
const int32_t out_shift,
|
|
||||||
const int32_t out_activation_min,
|
|
||||||
const int32_t out_activation_max,
|
|
||||||
const int32_t block_size)
|
|
||||||
{
|
|
||||||
(void)input_1_offset;
|
|
||||||
(void)input_2_offset;
|
|
||||||
(void)out_offset;
|
|
||||||
int32_t loop_count;
|
|
||||||
int32_t input_1;
|
|
||||||
int32_t input_2;
|
|
||||||
int32_t sum;
|
|
||||||
|
|
||||||
loop_count = block_size;
|
|
||||||
|
|
||||||
while (loop_count > 0)
|
|
||||||
{
|
|
||||||
/* C = A + B */
|
|
||||||
input_1 = *input_1_vect++ << left_shift;
|
|
||||||
input_2 = *input_2_vect++ << left_shift;
|
|
||||||
|
|
||||||
input_1 = arm_nn_requantize(input_1, input_1_mult, input_1_shift);
|
|
||||||
input_2 = arm_nn_requantize(input_2, input_2_mult, input_2_shift);
|
|
||||||
|
|
||||||
sum = input_1 + input_2;
|
|
||||||
sum = arm_nn_requantize(sum, out_mult, out_shift);
|
|
||||||
|
|
||||||
sum = MAX(sum, out_activation_min);
|
|
||||||
sum = MIN(sum, out_activation_max);
|
|
||||||
|
|
||||||
*output++ = (int16_t)sum;
|
|
||||||
|
|
||||||
/* Decrement loop counter */
|
|
||||||
loop_count--;
|
|
||||||
}
|
|
||||||
|
|
||||||
return (ARM_MATH_SUCCESS);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicMath group
|
|
||||||
*/
|
|
||||||
@ -1,255 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_elementwise_add_s8
|
|
||||||
* Description: Element wise add
|
|
||||||
*
|
|
||||||
* $Date: 01. March 2021
|
|
||||||
* $Revision: V.2.5.3
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M CPUs
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
#if defined(ARM_MATH_MVEI)
|
|
||||||
#include "arm_helium_utils.h"
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(ARM_MATH_MVEI)
|
|
||||||
#define SAT_INPUT_VECT(__INPUT_V, __MULT, __SHIFT) \
|
|
||||||
__INPUT_V = arm_doubling_high_mult_mve(__INPUT_V, __MULT); \
|
|
||||||
__INPUT_V = arm_divide_by_power_of_two_mve(__INPUT_V, -__SHIFT);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @note The *_no_sat API does not mean that the input not saturated, Since
|
|
||||||
* __MULT is a positive integer, it is saturated. The API definition
|
|
||||||
* has more info about it.
|
|
||||||
*/
|
|
||||||
#define SAT_INPUT(__INPUT, __MULT, __SHIFT) \
|
|
||||||
__INPUT = arm_nn_doubling_high_mult_no_sat(__INPUT, __MULT); \
|
|
||||||
__INPUT = arm_nn_divide_by_power_of_two(__INPUT, -__SHIFT);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup BasicMath
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* s8 element wise add
|
|
||||||
*
|
|
||||||
* Refer header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Note: __SHIFT is expected to be <=0 */
|
|
||||||
|
|
||||||
arm_status arm_elementwise_add_s8(const int8_t *input_1_vect,
|
|
||||||
const int8_t *input_2_vect,
|
|
||||||
const int32_t input_1_offset,
|
|
||||||
const int32_t input_1_mult,
|
|
||||||
const int32_t input_1_shift,
|
|
||||||
const int32_t input_2_offset,
|
|
||||||
const int32_t input_2_mult,
|
|
||||||
const int32_t input_2_shift,
|
|
||||||
const int32_t left_shift,
|
|
||||||
int8_t *output,
|
|
||||||
const int32_t out_offset,
|
|
||||||
const int32_t out_mult,
|
|
||||||
const int32_t out_shift,
|
|
||||||
const int32_t out_activation_min,
|
|
||||||
const int32_t out_activation_max,
|
|
||||||
const uint32_t block_size)
|
|
||||||
{
|
|
||||||
#if defined(ARM_MATH_MVEI)
|
|
||||||
int32_t count = (int32_t)block_size;
|
|
||||||
|
|
||||||
while (count > 0)
|
|
||||||
{
|
|
||||||
int32x4_t vect_1;
|
|
||||||
int32x4_t vect_2;
|
|
||||||
|
|
||||||
mve_pred16_t p = vctp32q((uint32_t)count);
|
|
||||||
|
|
||||||
vect_1 = vldrbq_z_s32(input_1_vect, p);
|
|
||||||
vect_2 = vldrbq_z_s32(input_2_vect, p);
|
|
||||||
|
|
||||||
vect_1 = vaddq_s32(vect_1, vdupq_n_s32(input_1_offset));
|
|
||||||
vect_2 = vaddq_s32(vect_2, vdupq_n_s32(input_2_offset));
|
|
||||||
|
|
||||||
vect_1 = vshlq_r_s32(vect_1, left_shift);
|
|
||||||
vect_2 = vshlq_r_s32(vect_2, left_shift);
|
|
||||||
|
|
||||||
SAT_INPUT_VECT(vect_1, input_1_mult, input_1_shift);
|
|
||||||
SAT_INPUT_VECT(vect_2, input_2_mult, input_2_shift);
|
|
||||||
|
|
||||||
vect_1 = vaddq_s32(vect_1, vect_2);
|
|
||||||
SAT_INPUT_VECT(vect_1, out_mult, out_shift);
|
|
||||||
|
|
||||||
vect_1 = vaddq_n_s32(vect_1, out_offset);
|
|
||||||
|
|
||||||
vect_1 = vmaxq_s32(vect_1, vdupq_n_s32(out_activation_min));
|
|
||||||
vect_1 = vminq_s32(vect_1, vdupq_n_s32(out_activation_max));
|
|
||||||
|
|
||||||
input_1_vect += 4;
|
|
||||||
input_2_vect += 4;
|
|
||||||
vstrbq_p_s32(output, vect_1, p);
|
|
||||||
|
|
||||||
output += 4;
|
|
||||||
count -= 4;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
uint32_t loop_count;
|
|
||||||
int32_t input_1;
|
|
||||||
int32_t input_2;
|
|
||||||
int32_t sum;
|
|
||||||
|
|
||||||
#if defined(ARM_MATH_DSP)
|
|
||||||
int32_t a_1, b_1, a_2, b_2;
|
|
||||||
|
|
||||||
int32_t offset_1_packed, offset_2_packed;
|
|
||||||
|
|
||||||
int8_t r1, r2, r3, r4;
|
|
||||||
|
|
||||||
offset_1_packed = (input_1_offset << 16U) | (input_1_offset & 0x0FFFFL);
|
|
||||||
offset_2_packed = (input_2_offset << 16U) | (input_2_offset & 0x0FFFFL);
|
|
||||||
|
|
||||||
loop_count = block_size >> 2;
|
|
||||||
|
|
||||||
while (loop_count > 0U)
|
|
||||||
{
|
|
||||||
/* 4 outputs are calculated in one loop. The order of calculation is follows the order of output sign extension
|
|
||||||
intrinsic */
|
|
||||||
input_1_vect = read_and_pad_reordered(input_1_vect, &b_1, &a_1);
|
|
||||||
input_2_vect = read_and_pad_reordered(input_2_vect, &b_2, &a_2);
|
|
||||||
|
|
||||||
a_1 = __SADD16(a_1, offset_1_packed);
|
|
||||||
b_1 = __SADD16(b_1, offset_1_packed);
|
|
||||||
|
|
||||||
a_2 = __SADD16(a_2, offset_2_packed);
|
|
||||||
b_2 = __SADD16(b_2, offset_2_packed);
|
|
||||||
|
|
||||||
/* Sum 1 */
|
|
||||||
input_1 = (b_1 & 0x0FFFF) << left_shift;
|
|
||||||
|
|
||||||
SAT_INPUT(input_1, input_1_mult, input_1_shift);
|
|
||||||
|
|
||||||
input_2 = (b_2 & 0x0FFFF) << left_shift;
|
|
||||||
SAT_INPUT(input_2, input_2_mult, input_2_shift);
|
|
||||||
|
|
||||||
sum = input_1 + input_2;
|
|
||||||
SAT_INPUT(sum, out_mult, out_shift);
|
|
||||||
sum += out_offset;
|
|
||||||
sum = MAX(sum, out_activation_min);
|
|
||||||
sum = MIN(sum, out_activation_max);
|
|
||||||
r1 = (q7_t)sum;
|
|
||||||
|
|
||||||
/* Sum 3 */
|
|
||||||
input_1 = ((b_1 >> 16) & 0x0FFFF) << left_shift;
|
|
||||||
SAT_INPUT(input_1, input_1_mult, input_1_shift);
|
|
||||||
|
|
||||||
input_2 = ((b_2 >> 16) & 0x0FFFF) << left_shift;
|
|
||||||
SAT_INPUT(input_2, input_2_mult, input_2_shift);
|
|
||||||
|
|
||||||
sum = input_1 + input_2;
|
|
||||||
SAT_INPUT(sum, out_mult, out_shift);
|
|
||||||
sum += out_offset;
|
|
||||||
sum = MAX(sum, out_activation_min);
|
|
||||||
sum = MIN(sum, out_activation_max);
|
|
||||||
r3 = (q7_t)sum;
|
|
||||||
|
|
||||||
/* Sum 2 */
|
|
||||||
input_1 = (a_1 & 0x0FFFF) << left_shift;
|
|
||||||
SAT_INPUT(input_1, input_1_mult, input_1_shift);
|
|
||||||
|
|
||||||
input_2 = (a_2 & 0x0FFFF) << left_shift;
|
|
||||||
SAT_INPUT(input_2, input_2_mult, input_2_shift);
|
|
||||||
|
|
||||||
sum = input_1 + input_2;
|
|
||||||
SAT_INPUT(sum, out_mult, out_shift);
|
|
||||||
sum += out_offset;
|
|
||||||
sum = MAX(sum, out_activation_min);
|
|
||||||
sum = MIN(sum, out_activation_max);
|
|
||||||
r2 = (q7_t)sum;
|
|
||||||
|
|
||||||
/* Sum 4 */
|
|
||||||
input_1 = ((a_1 >> 16) & 0x0FFFF) << left_shift;
|
|
||||||
SAT_INPUT(input_1, input_1_mult, input_1_shift);
|
|
||||||
|
|
||||||
input_2 = ((a_2 >> 16) & 0x0FFFF) << left_shift;
|
|
||||||
SAT_INPUT(input_2, input_2_mult, input_2_shift);
|
|
||||||
|
|
||||||
sum = input_1 + input_2;
|
|
||||||
SAT_INPUT(sum, out_mult, out_shift);
|
|
||||||
sum += out_offset;
|
|
||||||
sum = MAX(sum, out_activation_min);
|
|
||||||
sum = MIN(sum, out_activation_max);
|
|
||||||
r4 = (q7_t)sum;
|
|
||||||
|
|
||||||
write_q7x4_ia(&output, __PACKq7(r1, r2, r3, r4));
|
|
||||||
|
|
||||||
loop_count--;
|
|
||||||
}
|
|
||||||
|
|
||||||
loop_count = block_size & 0x3;
|
|
||||||
#else
|
|
||||||
loop_count = block_size;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
while (loop_count > 0U)
|
|
||||||
{
|
|
||||||
/* C = A + B */
|
|
||||||
|
|
||||||
input_1 = (*input_1_vect++ + input_1_offset) << left_shift;
|
|
||||||
input_2 = (*input_2_vect++ + input_2_offset) << left_shift;
|
|
||||||
|
|
||||||
input_1 = arm_nn_doubling_high_mult(input_1, input_1_mult);
|
|
||||||
input_1 = arm_nn_divide_by_power_of_two(input_1, -input_1_shift);
|
|
||||||
|
|
||||||
input_2 = arm_nn_doubling_high_mult(input_2, input_2_mult);
|
|
||||||
input_2 = arm_nn_divide_by_power_of_two(input_2, -input_2_shift);
|
|
||||||
|
|
||||||
sum = input_1 + input_2;
|
|
||||||
SAT_INPUT(sum, out_mult, out_shift);
|
|
||||||
sum += out_offset;
|
|
||||||
|
|
||||||
sum = MAX(sum, out_activation_min);
|
|
||||||
sum = MIN(sum, out_activation_max);
|
|
||||||
|
|
||||||
*output++ = (q7_t)sum;
|
|
||||||
|
|
||||||
/* Decrement loop counter */
|
|
||||||
loop_count--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* ARM_MATH_MVEI */
|
|
||||||
|
|
||||||
return (ARM_MATH_SUCCESS);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicMath group
|
|
||||||
*/
|
|
||||||
@ -1,95 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2022 Arm Limited or its affiliates.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_elementwise_mul_s16
|
|
||||||
* Description: Element wise multiplication
|
|
||||||
*
|
|
||||||
* $Date: 14 Februari 2022
|
|
||||||
* $Revision: V.1.0.0
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup BasicMath
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief s16 element wise multiplication of two vectors
|
|
||||||
*
|
|
||||||
* @note Refer header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
arm_status arm_elementwise_mul_s16(const int16_t *input_1_vect,
|
|
||||||
const int16_t *input_2_vect,
|
|
||||||
const int32_t input_1_offset,
|
|
||||||
const int32_t input_2_offset,
|
|
||||||
int16_t *output,
|
|
||||||
const int32_t out_offset,
|
|
||||||
const int32_t out_mult,
|
|
||||||
const int32_t out_shift,
|
|
||||||
const int32_t out_activation_min,
|
|
||||||
const int32_t out_activation_max,
|
|
||||||
const int32_t block_size)
|
|
||||||
{
|
|
||||||
(void)input_1_offset;
|
|
||||||
(void)input_2_offset;
|
|
||||||
(void)out_offset;
|
|
||||||
int32_t loop_count;
|
|
||||||
int32_t input_1;
|
|
||||||
int32_t input_2;
|
|
||||||
int32_t mul_res;
|
|
||||||
|
|
||||||
loop_count = block_size;
|
|
||||||
|
|
||||||
while (loop_count > 0)
|
|
||||||
{
|
|
||||||
/* C = A * B */
|
|
||||||
|
|
||||||
input_1 = *input_1_vect++;
|
|
||||||
input_2 = *input_2_vect++;
|
|
||||||
|
|
||||||
mul_res = input_1 * input_2;
|
|
||||||
mul_res = arm_nn_requantize(mul_res, out_mult, out_shift);
|
|
||||||
|
|
||||||
mul_res = MAX(mul_res, out_activation_min);
|
|
||||||
mul_res = MIN(mul_res, out_activation_max);
|
|
||||||
|
|
||||||
*output++ = (int16_t)mul_res;
|
|
||||||
|
|
||||||
/* Decrement loop counter */
|
|
||||||
loop_count--;
|
|
||||||
}
|
|
||||||
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicMath group
|
|
||||||
*/
|
|
||||||
@ -1,200 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_elementwise_mul_s8
|
|
||||||
* Description: Element wise multiplication
|
|
||||||
*
|
|
||||||
* $Date: January 26, 2021
|
|
||||||
* $Revision: V.1.0.5
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup BasicMath
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief s8 element wise multiplication of two vectors
|
|
||||||
*
|
|
||||||
* @note Refer header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_elementwise_mul_s8(const int8_t *input_1_vect,
|
|
||||||
const int8_t *input_2_vect,
|
|
||||||
const int32_t input_1_offset,
|
|
||||||
const int32_t input_2_offset,
|
|
||||||
int8_t *output,
|
|
||||||
const int32_t out_offset,
|
|
||||||
const int32_t out_mult,
|
|
||||||
const int32_t out_shift,
|
|
||||||
const int32_t out_activation_min,
|
|
||||||
const int32_t out_activation_max,
|
|
||||||
const uint32_t block_size)
|
|
||||||
{
|
|
||||||
|
|
||||||
int32_t loop_count;
|
|
||||||
#if defined(ARM_MATH_MVEI)
|
|
||||||
|
|
||||||
loop_count = (block_size + 3) / 4;
|
|
||||||
uint32_t num_elements = block_size;
|
|
||||||
|
|
||||||
for (int i = 0; i < loop_count; i++)
|
|
||||||
{
|
|
||||||
mve_pred16_t p = vctp32q(num_elements);
|
|
||||||
|
|
||||||
int32x4_t input_1 = vldrbq_z_s32(input_1_vect, p);
|
|
||||||
input_1 = vaddq_n_s32(input_1, input_1_offset);
|
|
||||||
|
|
||||||
int32x4_t input_2 = vldrbq_z_s32(input_2_vect, p);
|
|
||||||
input_2 = vaddq_n_s32(input_2, input_2_offset);
|
|
||||||
|
|
||||||
int32x4_t res_0 = vmulq_s32(input_1, input_2);
|
|
||||||
|
|
||||||
res_0 = arm_requantize_mve_32x4(res_0, vdupq_n_s32(out_mult), vdupq_n_s32(out_shift));
|
|
||||||
|
|
||||||
res_0 += vdupq_n_s32(out_offset);
|
|
||||||
|
|
||||||
res_0 = vmaxq_s32(res_0, vdupq_n_s32(out_activation_min));
|
|
||||||
res_0 = vminq_s32(res_0, vdupq_n_s32(out_activation_max));
|
|
||||||
|
|
||||||
vstrbq_p_s32(output, res_0, p);
|
|
||||||
input_1_vect += 4;
|
|
||||||
input_2_vect += 4;
|
|
||||||
output += 4;
|
|
||||||
num_elements -= 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
int32_t input_1;
|
|
||||||
int32_t input_2;
|
|
||||||
int32_t mul_res;
|
|
||||||
|
|
||||||
#if defined(ARM_MATH_DSP)
|
|
||||||
int32_t a_1, b_1, a_2, b_2;
|
|
||||||
|
|
||||||
int32_t offset_1_packed, offset_2_packed;
|
|
||||||
|
|
||||||
int8_t r1, r2, r3, r4;
|
|
||||||
|
|
||||||
offset_1_packed = (input_1_offset << 16U) | (input_1_offset & 0x0FFFFL);
|
|
||||||
offset_2_packed = (input_2_offset << 16U) | (input_2_offset & 0x0FFFFL);
|
|
||||||
|
|
||||||
loop_count = block_size >> 2;
|
|
||||||
|
|
||||||
while (loop_count > 0)
|
|
||||||
{
|
|
||||||
/* 4 outputs are calculated in one loop. The order of calculation is follows the order of output sign extension
|
|
||||||
intrinsic */
|
|
||||||
input_1_vect = read_and_pad_reordered(input_1_vect, &b_1, &a_1);
|
|
||||||
input_2_vect = read_and_pad_reordered(input_2_vect, &b_2, &a_2);
|
|
||||||
|
|
||||||
a_1 = __SADD16(a_1, offset_1_packed);
|
|
||||||
b_1 = __SADD16(b_1, offset_1_packed);
|
|
||||||
|
|
||||||
a_2 = __SADD16(a_2, offset_2_packed);
|
|
||||||
b_2 = __SADD16(b_2, offset_2_packed);
|
|
||||||
|
|
||||||
/* Mul 1 */
|
|
||||||
input_1 = (int16_t)(b_1 & 0x0FFFFL);
|
|
||||||
input_2 = (int16_t)(b_2 & 0x0FFFFL);
|
|
||||||
|
|
||||||
mul_res = input_1 * input_2;
|
|
||||||
mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset;
|
|
||||||
|
|
||||||
mul_res = MAX(mul_res, out_activation_min);
|
|
||||||
mul_res = MIN(mul_res, out_activation_max);
|
|
||||||
r1 = (q7_t)mul_res;
|
|
||||||
|
|
||||||
/* Mul 3 */
|
|
||||||
input_1 = (int16_t)((b_1 >> 16U) & 0x0FFFFL);
|
|
||||||
input_2 = (int16_t)((b_2 >> 16U) & 0x0FFFFL);
|
|
||||||
|
|
||||||
mul_res = input_1 * input_2;
|
|
||||||
mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset;
|
|
||||||
mul_res = MAX(mul_res, out_activation_min);
|
|
||||||
mul_res = MIN(mul_res, out_activation_max);
|
|
||||||
r3 = (q7_t)mul_res;
|
|
||||||
|
|
||||||
/* Mul 2 */
|
|
||||||
input_1 = (int16_t)(a_1 & 0x0FFFFL);
|
|
||||||
input_2 = (int16_t)(a_2 & 0x0FFFFL);
|
|
||||||
|
|
||||||
mul_res = input_1 * input_2;
|
|
||||||
mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset;
|
|
||||||
mul_res = MAX(mul_res, out_activation_min);
|
|
||||||
mul_res = MIN(mul_res, out_activation_max);
|
|
||||||
r2 = (q7_t)mul_res;
|
|
||||||
|
|
||||||
/* Mul 4 */
|
|
||||||
input_1 = (int16_t)((a_1 >> 16U) & 0x0FFFFL);
|
|
||||||
input_2 = (int16_t)((a_2 >> 16U) & 0x0FFFFL);
|
|
||||||
|
|
||||||
mul_res = input_1 * input_2;
|
|
||||||
mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset;
|
|
||||||
mul_res = MAX(mul_res, out_activation_min);
|
|
||||||
mul_res = MIN(mul_res, out_activation_max);
|
|
||||||
r4 = (q7_t)mul_res;
|
|
||||||
|
|
||||||
write_q7x4_ia(&output, __PACKq7(r1, r2, r3, r4));
|
|
||||||
|
|
||||||
loop_count--;
|
|
||||||
}
|
|
||||||
|
|
||||||
loop_count = block_size & 0x3;
|
|
||||||
#else
|
|
||||||
loop_count = block_size;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
while (loop_count > 0)
|
|
||||||
{
|
|
||||||
/* C = A * B */
|
|
||||||
|
|
||||||
input_1 = *input_1_vect++ + input_1_offset;
|
|
||||||
input_2 = *input_2_vect++ + input_2_offset;
|
|
||||||
|
|
||||||
mul_res = input_1 * input_2;
|
|
||||||
mul_res = arm_nn_requantize(mul_res, out_mult, out_shift) + out_offset;
|
|
||||||
|
|
||||||
mul_res = MAX(mul_res, out_activation_min);
|
|
||||||
mul_res = MIN(mul_res, out_activation_max);
|
|
||||||
|
|
||||||
*output++ = (q7_t)mul_res;
|
|
||||||
|
|
||||||
/* Decrement loop counter */
|
|
||||||
loop_count--;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicMath group
|
|
||||||
*/
|
|
||||||
@ -1,98 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2019-2021 Arm Limited.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
# not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
SET(ROOT ${CMSIS_PATH})
|
|
||||||
|
|
||||||
# Select which parts of the CMSIS-DSP must be compiled.
|
|
||||||
# There are some dependencies between the parts but they are not tracked
|
|
||||||
# by this cmake. So, enabling some functions may require to enable some
|
|
||||||
# other ones.
|
|
||||||
option(CONCATENATION "Concatenation" ON)
|
|
||||||
option(FULLYCONNECTED "Fully Connected" ON)
|
|
||||||
option(CONVOLUTION "Convolutions" ON)
|
|
||||||
option(ACTIVATION "Activations" ON)
|
|
||||||
option(POOLING "Pooling" ON)
|
|
||||||
option(SOFTMAX "Softmax" ON)
|
|
||||||
option(BASICMATHSNN "Basic Maths for NN" ON)
|
|
||||||
option(RESHAPE "Reshape" ON)
|
|
||||||
option(SVDF "SVDF" ON)
|
|
||||||
|
|
||||||
# When OFF it is the default behavior : all tables are included.
|
|
||||||
option(NNSUPPORT "NN Support" ON)
|
|
||||||
|
|
||||||
|
|
||||||
###########################
|
|
||||||
#
|
|
||||||
# CMSIS NN
|
|
||||||
#
|
|
||||||
###########################
|
|
||||||
|
|
||||||
# NN Sources
|
|
||||||
SET(NN ${ROOT}/CMSIS/NN)
|
|
||||||
|
|
||||||
list(APPEND CMAKE_MODULE_PATH ${NN}/Source)
|
|
||||||
|
|
||||||
add_library(cmsis-nn STATIC)
|
|
||||||
|
|
||||||
target_compile_options(cmsis-nn PRIVATE -Ofast)
|
|
||||||
|
|
||||||
### Includes
|
|
||||||
target_include_directories(cmsis-nn PUBLIC "${NN}/Include")
|
|
||||||
target_include_directories(cmsis-nn PUBLIC "${ROOT}/CMSIS/Core/Include")
|
|
||||||
target_include_directories(cmsis-nn PUBLIC "${ROOT}/CMSIS/DSP/Include")
|
|
||||||
|
|
||||||
if (BASICMATHSNN)
|
|
||||||
add_subdirectory(BasicMathFunctions)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (CONCATENATION)
|
|
||||||
add_subdirectory(ConcatenationFunctions)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (FULLYCONNECTED)
|
|
||||||
add_subdirectory(FullyConnectedFunctions)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (CONVOLUTION)
|
|
||||||
add_subdirectory(ConvolutionFunctions)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (ACTIVATION)
|
|
||||||
add_subdirectory(ActivationFunctions)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (POOLING)
|
|
||||||
add_subdirectory(PoolingFunctions)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (SOFTMAX)
|
|
||||||
add_subdirectory(SoftmaxFunctions)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (SVDF)
|
|
||||||
add_subdirectory(SVDFunctions)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (RESHAPE)
|
|
||||||
add_subdirectory(ReshapeFunctions)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
# Keep NNSUPPORT at the end
|
|
||||||
if (NNSUPPORT)
|
|
||||||
add_subdirectory(NNSupportFunctions)
|
|
||||||
endif()
|
|
||||||
@ -1,20 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2019-2021 Arm Limited.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
# not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
file(GLOB SRC "./*_*.c")
|
|
||||||
target_sources(cmsis-nn PRIVATE ${SRC})
|
|
||||||
@ -1,66 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_concatenation_s8_w.c
|
|
||||||
* Description: s8 version of concatenation along the W axis
|
|
||||||
*
|
|
||||||
* $Date: October 2019
|
|
||||||
* $Revision: V.1.0.0
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup Concatenation
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* s8 version of concatenation along the W axis
|
|
||||||
*
|
|
||||||
* Refer to header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
void arm_concatenation_s8_w(const int8_t *input,
|
|
||||||
const uint16_t input_x,
|
|
||||||
const uint16_t input_y,
|
|
||||||
const uint16_t input_z,
|
|
||||||
const uint16_t input_w,
|
|
||||||
int8_t *output,
|
|
||||||
const uint32_t offset_w)
|
|
||||||
{
|
|
||||||
const uint32_t input_copy_size = input_x * input_y * input_z * input_w;
|
|
||||||
|
|
||||||
output += offset_w * (input_x * input_y * input_z);
|
|
||||||
|
|
||||||
arm_memcpy_q7(output, input, input_copy_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of Concatenation group
|
|
||||||
*/
|
|
||||||
@ -1,75 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_concatenation_s8_x.c
|
|
||||||
* Description: s8 version of concatenation along the X axis
|
|
||||||
*
|
|
||||||
* $Date: October 2019
|
|
||||||
* $Revision: V.1.0.0
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup Concatenation
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* s8 version of concatenation along the X axis
|
|
||||||
*
|
|
||||||
* Refer to header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
void arm_concatenation_s8_x(const int8_t *input,
|
|
||||||
const uint16_t input_x,
|
|
||||||
const uint16_t input_y,
|
|
||||||
const uint16_t input_z,
|
|
||||||
const uint16_t input_w,
|
|
||||||
int8_t *output,
|
|
||||||
const uint16_t output_x,
|
|
||||||
const uint32_t offset_x)
|
|
||||||
{
|
|
||||||
const uint32_t num_iterations = input_y * input_z * input_w;
|
|
||||||
|
|
||||||
output += offset_x;
|
|
||||||
|
|
||||||
uint32_t i;
|
|
||||||
|
|
||||||
// Copy per row
|
|
||||||
for (i = 0; i < num_iterations; ++i)
|
|
||||||
{
|
|
||||||
arm_memcpy_q7(output, input, input_x);
|
|
||||||
input += input_x;
|
|
||||||
output += output_x;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of Concatenation group
|
|
||||||
*/
|
|
||||||
@ -1,76 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_concatenation_s8_y.c
|
|
||||||
* Description: s8 version of concatenation along the Y axis
|
|
||||||
*
|
|
||||||
* $Date: October 2019
|
|
||||||
* $Revision: V.1.0.0
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup Concatenation
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* s8 version of concatenation along the Y axis
|
|
||||||
*
|
|
||||||
* Refer to header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
void arm_concatenation_s8_y(const int8_t *input,
|
|
||||||
const uint16_t input_x,
|
|
||||||
const uint16_t input_y,
|
|
||||||
const uint16_t input_z,
|
|
||||||
const uint16_t input_w,
|
|
||||||
int8_t *output,
|
|
||||||
const uint16_t output_y,
|
|
||||||
const uint32_t offset_y)
|
|
||||||
{
|
|
||||||
const uint32_t num_iterations = input_z * input_w;
|
|
||||||
const uint32_t input_copy_size = input_x * input_y;
|
|
||||||
const uint32_t output_stride = input_x * output_y;
|
|
||||||
|
|
||||||
output += offset_y * input_x;
|
|
||||||
uint32_t i;
|
|
||||||
|
|
||||||
// Copy per tile
|
|
||||||
for (i = 0; i < num_iterations; ++i)
|
|
||||||
{
|
|
||||||
arm_memcpy_q7(output, input, input_copy_size);
|
|
||||||
input += input_copy_size;
|
|
||||||
output += output_stride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of Concatenation group
|
|
||||||
*/
|
|
||||||
@ -1,75 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_concatenation_s8_z.c
|
|
||||||
* Description: s8 version of concatenation along the Z axis
|
|
||||||
*
|
|
||||||
* $Date: October 2019
|
|
||||||
* $Revision: V.1.0.0
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup Concatenation
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* s8 version of concatenation along the Z axis
|
|
||||||
*
|
|
||||||
* Refer to header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
void arm_concatenation_s8_z(const int8_t *input,
|
|
||||||
const uint16_t input_x,
|
|
||||||
const uint16_t input_y,
|
|
||||||
const uint16_t input_z,
|
|
||||||
const uint16_t input_w,
|
|
||||||
int8_t *output,
|
|
||||||
const uint16_t output_z,
|
|
||||||
const uint32_t offset_z)
|
|
||||||
{
|
|
||||||
const uint32_t input_copy_size = input_x * input_y * input_z;
|
|
||||||
const uint32_t output_stride = input_x * input_y * output_z;
|
|
||||||
|
|
||||||
output += offset_z * (input_x * input_y);
|
|
||||||
|
|
||||||
uint32_t i;
|
|
||||||
|
|
||||||
for (i = 0; i < input_w; ++i)
|
|
||||||
{
|
|
||||||
arm_memcpy_q7(output, input, input_copy_size);
|
|
||||||
input += input_copy_size;
|
|
||||||
output += output_stride;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of Concatenation group
|
|
||||||
*/
|
|
||||||
@ -1,24 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2019-2022 Arm Limited.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
# not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
file(GLOB SRC "./*_s8*.c")
|
|
||||||
file(GLOB SRC_S16 "./*_s16*.c")
|
|
||||||
target_sources(cmsis-nn PRIVATE ${SRC} ${SRC_S16})
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -1,205 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_convolve_1_x_n_s8.c
|
|
||||||
* Description: s8 version of 1xN convolution using symmetric quantization.
|
|
||||||
*
|
|
||||||
* $Date: December 14, 2021
|
|
||||||
* $Revision: V.2.1.0
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* 1xN s8 convolution function.
|
|
||||||
*
|
|
||||||
* Refer header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_convolve_1_x_n_s8(const cmsis_nn_context *ctx,
|
|
||||||
const cmsis_nn_conv_params *conv_params,
|
|
||||||
const cmsis_nn_per_channel_quant_params *quant_params,
|
|
||||||
const cmsis_nn_dims *input_dims,
|
|
||||||
const q7_t *input_data,
|
|
||||||
const cmsis_nn_dims *filter_dims,
|
|
||||||
const q7_t *filter_data,
|
|
||||||
const cmsis_nn_dims *bias_dims,
|
|
||||||
const int32_t *bias_data,
|
|
||||||
const cmsis_nn_dims *output_dims,
|
|
||||||
q7_t *output_data)
|
|
||||||
{
|
|
||||||
(void)bias_dims;
|
|
||||||
arm_status status = ARM_MATH_SUCCESS;
|
|
||||||
if (output_dims->w % 4 != 0)
|
|
||||||
{
|
|
||||||
status = ARM_MATH_SIZE_MISMATCH;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(ARM_MATH_MVEI)
|
|
||||||
(void)ctx;
|
|
||||||
|
|
||||||
const uint16_t input_x = input_dims->w;
|
|
||||||
const uint16_t kernel_x = filter_dims->w;
|
|
||||||
const uint16_t output_x = output_dims->w;
|
|
||||||
const uint16_t output_ch = output_dims->c;
|
|
||||||
const uint16_t input_ch = input_dims->c;
|
|
||||||
const uint16_t pad_x = conv_params->padding.w;
|
|
||||||
const uint16_t stride_x = conv_params->stride.w;
|
|
||||||
|
|
||||||
const int32_t input_offset = conv_params->input_offset;
|
|
||||||
const int32_t out_offset = conv_params->output_offset;
|
|
||||||
const int32_t out_activation_min = conv_params->activation.min;
|
|
||||||
const int32_t out_activation_max = conv_params->activation.max;
|
|
||||||
int32_t *output_mult = quant_params->multiplier;
|
|
||||||
int32_t *output_shift = quant_params->shift;
|
|
||||||
|
|
||||||
for (int i_out_x = 0; i_out_x <= (output_x - 4); i_out_x += 4)
|
|
||||||
{
|
|
||||||
int32_t input_begin_idx[4];
|
|
||||||
int32_t ker_begin_idx[4];
|
|
||||||
int32_t ker_end_idx[4];
|
|
||||||
|
|
||||||
for (int i = 0; i < 4; i++)
|
|
||||||
{
|
|
||||||
const int32_t est_input_x_idx = stride_x * (i_out_x + i) - pad_x;
|
|
||||||
input_begin_idx[i] = MAX(0, est_input_x_idx);
|
|
||||||
ker_begin_idx[i] = MAX(0, -est_input_x_idx);
|
|
||||||
ker_end_idx[i] = MIN(kernel_x, input_x - est_input_x_idx);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((ker_begin_idx[0] != 0) || (ker_end_idx[3] != kernel_x))
|
|
||||||
{
|
|
||||||
for (int i_out_ch = 0; i_out_ch < output_ch; i_out_ch++)
|
|
||||||
{
|
|
||||||
int32x4_t s_offset;
|
|
||||||
int32_t acc[4];
|
|
||||||
{
|
|
||||||
int32_t sum_row[4];
|
|
||||||
|
|
||||||
(void)arm_nn_mat_mul_core_1x_s8((ker_end_idx[0] - ker_begin_idx[0]) * input_ch,
|
|
||||||
input_data + input_begin_idx[0] * input_ch,
|
|
||||||
filter_data + (input_ch * kernel_x * i_out_ch) +
|
|
||||||
(ker_begin_idx[0] * input_ch),
|
|
||||||
&sum_row[0],
|
|
||||||
&acc[0]);
|
|
||||||
(void)arm_nn_mat_mul_core_1x_s8((ker_end_idx[1] - ker_begin_idx[1]) * input_ch,
|
|
||||||
input_data + input_begin_idx[1] * input_ch,
|
|
||||||
filter_data + (input_ch * kernel_x * i_out_ch) +
|
|
||||||
(ker_begin_idx[1] * input_ch),
|
|
||||||
&sum_row[1],
|
|
||||||
&acc[1]);
|
|
||||||
|
|
||||||
(void)arm_nn_mat_mul_core_1x_s8((ker_end_idx[2] - ker_begin_idx[2]) * input_ch,
|
|
||||||
input_data + input_begin_idx[2] * input_ch,
|
|
||||||
filter_data + (input_ch * kernel_x * i_out_ch) +
|
|
||||||
(ker_begin_idx[2] * input_ch),
|
|
||||||
&sum_row[2],
|
|
||||||
&acc[2]);
|
|
||||||
|
|
||||||
(void)arm_nn_mat_mul_core_1x_s8((ker_end_idx[3] - ker_begin_idx[3]) * input_ch,
|
|
||||||
input_data + input_begin_idx[3] * input_ch,
|
|
||||||
filter_data + (input_ch * kernel_x * i_out_ch) +
|
|
||||||
(ker_begin_idx[3] * input_ch),
|
|
||||||
&sum_row[3],
|
|
||||||
&acc[3]);
|
|
||||||
|
|
||||||
s_offset = vldrwq_s32(sum_row);
|
|
||||||
}
|
|
||||||
int32x4_t res = vldrwq_s32(acc);
|
|
||||||
s_offset = vmulq_n_s32(s_offset, input_offset);
|
|
||||||
res = vaddq_s32(res, s_offset);
|
|
||||||
if (bias_data)
|
|
||||||
{
|
|
||||||
res = vaddq_n_s32(res, bias_data[i_out_ch]);
|
|
||||||
}
|
|
||||||
res = arm_requantize_mve(res, output_mult[i_out_ch], output_shift[i_out_ch]);
|
|
||||||
res = vaddq_n_s32(res, out_offset);
|
|
||||||
|
|
||||||
res = vmaxq_s32(res, vdupq_n_s32(out_activation_min));
|
|
||||||
res = vminq_s32(res, vdupq_n_s32(out_activation_max));
|
|
||||||
|
|
||||||
const uint32x4_t scatter_offset = {0, output_ch, output_ch * 2, output_ch * 3};
|
|
||||||
vstrbq_scatter_offset_s32(output_data, scatter_offset, res);
|
|
||||||
output_data++;
|
|
||||||
}
|
|
||||||
output_data += (3 * output_ch);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
output_data = arm_nn_mat_mul_core_4x_s8(kernel_x * input_ch,
|
|
||||||
stride_x * input_ch,
|
|
||||||
input_data + input_begin_idx[0] * input_ch,
|
|
||||||
filter_data,
|
|
||||||
output_ch,
|
|
||||||
conv_params,
|
|
||||||
quant_params,
|
|
||||||
bias_data,
|
|
||||||
output_data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
status = arm_convolve_s8(ctx,
|
|
||||||
conv_params,
|
|
||||||
quant_params,
|
|
||||||
input_dims,
|
|
||||||
input_data,
|
|
||||||
filter_dims,
|
|
||||||
filter_data,
|
|
||||||
bias_dims,
|
|
||||||
bias_data,
|
|
||||||
output_dims,
|
|
||||||
output_data);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
out:
|
|
||||||
/* Return to application */
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
int32_t arm_convolve_1_x_n_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
|
|
||||||
{
|
|
||||||
#if !defined(ARM_MATH_MVEI)
|
|
||||||
return (2 * input_dims->c * filter_dims->w * filter_dims->h) * sizeof(int16_t);
|
|
||||||
#else
|
|
||||||
(void)input_dims;
|
|
||||||
(void)filter_dims;
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,235 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_convolve_1x1_HWC_q7_fast_nonsquare.c
|
|
||||||
* Description: Fast Q7 version of 1x1 convolution (non-square shape)
|
|
||||||
*
|
|
||||||
* $Date: July 20, 2021
|
|
||||||
* $Revision: V.1.1.2
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Fast Q7 version of 1x1 convolution (non-sqaure shape)
|
|
||||||
* @param[in] Im_in pointer to input tensor
|
|
||||||
* @param[in] dim_im_in_x input tensor dimention x
|
|
||||||
* @param[in] dim_im_in_y input tensor dimention y
|
|
||||||
* @param[in] ch_im_in number of input tensor channels
|
|
||||||
* @param[in] wt pointer to kernel weights
|
|
||||||
* @param[in] ch_im_out number of filters, i.e., output tensor channels
|
|
||||||
* @param[in] dim_kernel_x filter kernel size x
|
|
||||||
* @param[in] dim_kernel_y filter kernel size y
|
|
||||||
* @param[in] padding_x padding size x
|
|
||||||
* @param[in] padding_y padding size y
|
|
||||||
* @param[in] stride_x convolution stride x
|
|
||||||
* @param[in] stride_y convolution stride y
|
|
||||||
* @param[in] bias pointer to bias
|
|
||||||
* @param[in] bias_shift amount of left-shift for bias
|
|
||||||
* @param[in] out_shift amount of right-shift for output
|
|
||||||
* @param[in,out] Im_out pointer to output tensor
|
|
||||||
* @param[in] dim_im_out_x output tensor dimension x
|
|
||||||
* @param[in] dim_im_out_y output tensor dimension y
|
|
||||||
* @param[in,out] bufferA pointer to buffer space for input
|
|
||||||
* @param[in,out] bufferB pointer to buffer space for output
|
|
||||||
* @return The function returns either
|
|
||||||
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
|
|
||||||
*
|
|
||||||
* This function is optimized for convolution with 1x1 kernel size (i.e., dim_kernel_x=1
|
|
||||||
* and dim_kernel_y=1). It can be used for the second half of MobileNets [1] after depthwise
|
|
||||||
* separable convolution.
|
|
||||||
*
|
|
||||||
* This function is the version with full list of optimization tricks, but with
|
|
||||||
* some constraints:
|
|
||||||
* ch_im_in is multiple of 4
|
|
||||||
* ch_im_out is multiple of 2
|
|
||||||
*
|
|
||||||
* [1] MobileNets: Efficient Convolutional Neural Networks for Mobile Vision Applications
|
|
||||||
* https://arxiv.org/abs/1704.04861
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_convolve_1x1_HWC_q7_fast_nonsquare(const q7_t *Im_in,
|
|
||||||
const uint16_t dim_im_in_x,
|
|
||||||
const uint16_t dim_im_in_y,
|
|
||||||
const uint16_t ch_im_in,
|
|
||||||
const q7_t *wt,
|
|
||||||
const uint16_t ch_im_out,
|
|
||||||
const uint16_t dim_kernel_x,
|
|
||||||
const uint16_t dim_kernel_y,
|
|
||||||
const uint16_t padding_x,
|
|
||||||
const uint16_t padding_y,
|
|
||||||
const uint16_t stride_x,
|
|
||||||
const uint16_t stride_y,
|
|
||||||
const q7_t *bias,
|
|
||||||
const uint16_t bias_shift,
|
|
||||||
const uint16_t out_shift,
|
|
||||||
q7_t *Im_out,
|
|
||||||
const uint16_t dim_im_out_x,
|
|
||||||
const uint16_t dim_im_out_y,
|
|
||||||
q15_t *bufferA,
|
|
||||||
q7_t *bufferB)
|
|
||||||
{
|
|
||||||
(void)bufferB;
|
|
||||||
#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
|
|
||||||
/* Run the following code for Cortex-M4 and Cortex-M7 */
|
|
||||||
(void)dim_im_in_y;
|
|
||||||
int16_t i_out_y, i_out_x;
|
|
||||||
int16_t i_ch_out;
|
|
||||||
|
|
||||||
/* -----------------------
|
|
||||||
* Here we use bufferA as q15_t internally as computation are done with q15_t level
|
|
||||||
* im2col are done to output in q15_t format from q7_t input
|
|
||||||
*/
|
|
||||||
|
|
||||||
q15_t *pBuffer = bufferA;
|
|
||||||
q7_t *pOut = Im_out;
|
|
||||||
|
|
||||||
if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0 || dim_kernel_x != 1 || dim_kernel_y != 1 || padding_x != 0 ||
|
|
||||||
padding_y != 0 || stride_x != 1 || stride_y != 1)
|
|
||||||
{
|
|
||||||
/* check if the input dimension meets the constraints */
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++)
|
|
||||||
{
|
|
||||||
for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++)
|
|
||||||
{
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
arm_q7_to_q15_reordered_no_shift(
|
|
||||||
(q7_t *)Im_in + (i_out_y * dim_im_in_x + i_out_x) * ch_im_in, pBuffer, ch_im_in);
|
|
||||||
pBuffer += ch_im_in;
|
|
||||||
|
|
||||||
if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y)
|
|
||||||
{
|
|
||||||
pOut = arm_nn_mat_mult_kernel_q7_q15_reordered(
|
|
||||||
wt, bufferA, ch_im_out, ch_im_in, bias_shift, out_shift, bias, pOut);
|
|
||||||
/* counter reset */
|
|
||||||
pBuffer = bufferA;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* check if there is left-over for compute */
|
|
||||||
if (pBuffer != bufferA)
|
|
||||||
{
|
|
||||||
const q7_t *pA = wt;
|
|
||||||
for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++)
|
|
||||||
{
|
|
||||||
q31_t sum = ((q31_t)(bias[i_ch_out]) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
const q15_t *pB = bufferA;
|
|
||||||
/* basically each time it process 4 entries */
|
|
||||||
uint16_t colCnt = ch_im_in * dim_kernel_x * dim_kernel_y >> 2;
|
|
||||||
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
|
|
||||||
q31_t inA1, inA2;
|
|
||||||
q31_t inB1, inB2;
|
|
||||||
|
|
||||||
pA = read_and_pad_reordered(pA, &inA1, &inA2);
|
|
||||||
|
|
||||||
inB1 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
sum = __SMLAD(inA1, inB1, sum);
|
|
||||||
inB2 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
|
|
||||||
sum = __SMLAD(inA2, inB2, sum);
|
|
||||||
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
colCnt = ch_im_in * dim_kernel_y * dim_kernel_x & 0x3;
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q7_t inA1 = *pA++;
|
|
||||||
q15_t inB1 = *pB++;
|
|
||||||
sum += inA1 * inB1;
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
*pOut = (q7_t)__SSAT((sum >> out_shift), 8);
|
|
||||||
pOut++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
(void)bufferA;
|
|
||||||
/* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
|
|
||||||
int i, j, k, l, m, n;
|
|
||||||
int conv_out;
|
|
||||||
int in_row, in_col;
|
|
||||||
|
|
||||||
if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0 || dim_kernel_x != 1 || dim_kernel_y != 1 || padding_x != 0 ||
|
|
||||||
padding_y != 0 || stride_x != 1 || stride_y != 1)
|
|
||||||
{
|
|
||||||
/* check if the input dimension meets the constraints */
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < ch_im_out; i++)
|
|
||||||
{
|
|
||||||
for (j = 0; j < dim_im_out_y; j++)
|
|
||||||
{
|
|
||||||
for (k = 0; k < dim_im_out_x; k++)
|
|
||||||
{
|
|
||||||
conv_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
for (m = 0; m < dim_kernel_y; m++)
|
|
||||||
{
|
|
||||||
for (n = 0; n < dim_kernel_x; n++)
|
|
||||||
{
|
|
||||||
// if-for implementation
|
|
||||||
in_row = stride_y * j + m - padding_y;
|
|
||||||
in_col = stride_x * k + n - padding_x;
|
|
||||||
if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x)
|
|
||||||
{
|
|
||||||
for (l = 0; l < ch_im_in; l++)
|
|
||||||
{
|
|
||||||
conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + l] *
|
|
||||||
wt[i * ch_im_in * dim_kernel_y * dim_kernel_x + (m * dim_kernel_y + n) * ch_im_in +
|
|
||||||
l];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q7_t)__SSAT((conv_out >> out_shift), 8);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* ARM_MATH_DSP */
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,161 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_convolve_1x1_s8_fast.c
|
|
||||||
* Description: Fast q7 version of 1x1 convolution (non-square shape)
|
|
||||||
*
|
|
||||||
* $Date: 12. November 2021
|
|
||||||
* $Revision: V.2.0.4
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M Processors
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
#include <stdio.h>
|
|
||||||
|
|
||||||
#define DIM_KER_X (1U)
|
|
||||||
#define DIM_KER_Y (1U)
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Fast s8 version for 1x1 convolution (non-square shape)
|
|
||||||
*
|
|
||||||
* Refer header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_convolve_1x1_s8_fast(const cmsis_nn_context *ctx,
|
|
||||||
const cmsis_nn_conv_params *conv_params,
|
|
||||||
const cmsis_nn_per_channel_quant_params *quant_params,
|
|
||||||
const cmsis_nn_dims *input_dims,
|
|
||||||
const q7_t *input_data,
|
|
||||||
const cmsis_nn_dims *filter_dims,
|
|
||||||
const q7_t *filter_data,
|
|
||||||
const cmsis_nn_dims *bias_dims,
|
|
||||||
const int32_t *bias_data,
|
|
||||||
const cmsis_nn_dims *output_dims,
|
|
||||||
q7_t *output_data)
|
|
||||||
{
|
|
||||||
if (input_dims->c % 4 != 0 || conv_params->padding.w != 0 || conv_params->padding.h != 0 ||
|
|
||||||
conv_params->stride.w != 1 || conv_params->stride.h != 1)
|
|
||||||
{
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
(void)ctx;
|
|
||||||
(void)filter_dims;
|
|
||||||
(void)bias_dims;
|
|
||||||
|
|
||||||
#if defined(ARM_MATH_MVEI)
|
|
||||||
|
|
||||||
const int32_t col_len = input_dims->w * input_dims->h * input_dims->n;
|
|
||||||
const int32_t output_ch = output_dims->c;
|
|
||||||
const int32_t input_ch = input_dims->c;
|
|
||||||
const int32_t input_offset = conv_params->input_offset;
|
|
||||||
const int32_t out_offset = conv_params->output_offset;
|
|
||||||
const int32_t out_activation_min = conv_params->activation.min;
|
|
||||||
const int32_t out_activation_max = conv_params->activation.max;
|
|
||||||
int32_t *output_mult = quant_params->multiplier;
|
|
||||||
int32_t *output_shift = quant_params->shift;
|
|
||||||
|
|
||||||
for (int i_items = 0; i_items <= (col_len - 4); i_items += 4)
|
|
||||||
{
|
|
||||||
|
|
||||||
output_data = arm_nn_mat_mul_core_4x_s8(input_ch,
|
|
||||||
input_ch,
|
|
||||||
input_data + i_items * input_ch,
|
|
||||||
filter_data,
|
|
||||||
output_ch,
|
|
||||||
conv_params,
|
|
||||||
quant_params,
|
|
||||||
bias_data,
|
|
||||||
output_data);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Handle left over elements */
|
|
||||||
for (int i_items = (col_len & ~0x3); i_items < col_len; i_items++)
|
|
||||||
{
|
|
||||||
for (int i_out_ch = 0; i_out_ch < output_ch; i_out_ch++)
|
|
||||||
{
|
|
||||||
int32_t sum_row = 0;
|
|
||||||
int32_t acc;
|
|
||||||
(void)arm_nn_mat_mul_core_1x_s8(
|
|
||||||
input_ch, input_data + i_items * input_ch, filter_data + i_out_ch * input_ch, &sum_row, &acc);
|
|
||||||
if (bias_data)
|
|
||||||
{
|
|
||||||
acc += bias_data[i_out_ch];
|
|
||||||
}
|
|
||||||
sum_row = (sum_row * input_offset);
|
|
||||||
acc += sum_row;
|
|
||||||
acc = arm_nn_requantize(acc, output_mult[i_out_ch], output_shift[i_out_ch]);
|
|
||||||
acc += out_offset;
|
|
||||||
|
|
||||||
acc = MAX(acc, out_activation_min);
|
|
||||||
acc = MIN(acc, out_activation_max);
|
|
||||||
*output_data++ = acc;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
/* Run the following code as reference implementation for Cortex-M processors with or without DSP extension */
|
|
||||||
|
|
||||||
const int32_t lhs_rows = input_dims->w * input_dims->h * input_dims->n;
|
|
||||||
const int32_t rhs_rows = output_dims->c;
|
|
||||||
const int32_t rhs_cols = input_dims->c;
|
|
||||||
|
|
||||||
arm_nn_mat_mult_nt_t_s8(input_data,
|
|
||||||
filter_data,
|
|
||||||
bias_data,
|
|
||||||
output_data,
|
|
||||||
quant_params->multiplier,
|
|
||||||
quant_params->shift,
|
|
||||||
lhs_rows,
|
|
||||||
rhs_rows,
|
|
||||||
rhs_cols,
|
|
||||||
conv_params->input_offset,
|
|
||||||
conv_params->output_offset,
|
|
||||||
conv_params->activation.min,
|
|
||||||
conv_params->activation.max);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
int32_t arm_convolve_1x1_s8_fast_get_buffer_size(const cmsis_nn_dims *input_dims)
|
|
||||||
{
|
|
||||||
(void)input_dims;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,209 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_convolve_HWC_q15_basic.c
|
|
||||||
* Description: Q15 version of convolution
|
|
||||||
*
|
|
||||||
* $Date: July 20, 2021
|
|
||||||
* $Revision: V.1.1.2
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Basic Q15 convolution function
|
|
||||||
* @param[in] Im_in pointer to input tensor
|
|
||||||
* @param[in] dim_im_in input tensor dimention
|
|
||||||
* @param[in] ch_im_in number of input tensor channels
|
|
||||||
* @param[in] wt pointer to kernel weights
|
|
||||||
* @param[in] ch_im_out number of filters, i.e., output tensor channels
|
|
||||||
* @param[in] dim_kernel filter kernel size
|
|
||||||
* @param[in] padding padding sizes
|
|
||||||
* @param[in] stride convolution stride
|
|
||||||
* @param[in] bias pointer to bias
|
|
||||||
* @param[in] bias_shift amount of left-shift for bias
|
|
||||||
* @param[in] out_shift amount of right-shift for output
|
|
||||||
* @param[in,out] Im_out pointer to output tensor
|
|
||||||
* @param[in] dim_im_out output tensor dimension
|
|
||||||
* @param[in,out] bufferA pointer to buffer space for input
|
|
||||||
* @param[in,out] bufferB pointer to buffer space for output
|
|
||||||
* @return The function returns <code>ARM_MATH_SUCCESS</code>
|
|
||||||
*
|
|
||||||
* @details
|
|
||||||
*
|
|
||||||
* <b>Buffer size:</b>
|
|
||||||
*
|
|
||||||
* bufferA size: ch_im_in*dim_kernel*dim_kernel
|
|
||||||
*
|
|
||||||
* bufferB size: 0
|
|
||||||
*
|
|
||||||
* This basic version is designed to work for any input tensor and weight
|
|
||||||
* dimension.
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_convolve_HWC_q15_basic(const q15_t *Im_in,
|
|
||||||
const uint16_t dim_im_in,
|
|
||||||
const uint16_t ch_im_in,
|
|
||||||
const q15_t *wt,
|
|
||||||
const uint16_t ch_im_out,
|
|
||||||
const uint16_t dim_kernel,
|
|
||||||
const uint16_t padding,
|
|
||||||
const uint16_t stride,
|
|
||||||
const q15_t *bias,
|
|
||||||
const uint16_t bias_shift,
|
|
||||||
const uint16_t out_shift,
|
|
||||||
q15_t *Im_out,
|
|
||||||
const uint16_t dim_im_out,
|
|
||||||
q15_t *bufferA,
|
|
||||||
q7_t *bufferB)
|
|
||||||
{
|
|
||||||
(void)bufferB;
|
|
||||||
#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
|
|
||||||
/* Run the following code for Cortex-M4 and Cortex-M7 */
|
|
||||||
|
|
||||||
int16_t i_out_y, i_out_x, i_ker_y, i_ker_x;
|
|
||||||
|
|
||||||
uint16_t im2col_out_pixel_index = 0;
|
|
||||||
q15_t *pBuffer = bufferA;
|
|
||||||
q15_t *pOut = Im_out;
|
|
||||||
q15_t *im_buffer = bufferA;
|
|
||||||
const q15_t *pA;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++)
|
|
||||||
{
|
|
||||||
for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++)
|
|
||||||
{
|
|
||||||
for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++)
|
|
||||||
{
|
|
||||||
if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in)
|
|
||||||
{
|
|
||||||
/* Filling 0 for out-of-bound paddings */
|
|
||||||
/* arm_fill_q15(0, pBuffer, ch_im_in); */
|
|
||||||
memset(pBuffer, 0, sizeof(q15_t) * ch_im_in);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* arm_copy_q15((q15_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer,
|
|
||||||
* ch_im_in); */
|
|
||||||
memcpy(pBuffer,
|
|
||||||
(q15_t *)Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in,
|
|
||||||
sizeof(q15_t) * ch_im_in);
|
|
||||||
}
|
|
||||||
pBuffer += ch_im_in;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pA = wt;
|
|
||||||
for (i = 0; i < ch_im_out; i++)
|
|
||||||
{
|
|
||||||
q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
const q15_t *pB = im_buffer;
|
|
||||||
uint16_t colCnt = ch_im_in * dim_kernel * dim_kernel >> 2;
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q31_t inA1 = arm_nn_read_q15x2_ia(&pA);
|
|
||||||
q31_t inB1 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
q31_t inA2 = arm_nn_read_q15x2_ia(&pA);
|
|
||||||
q31_t inB2 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
|
|
||||||
sum = __SMLAD(inA1, inB1, sum);
|
|
||||||
sum = __SMLAD(inA2, inB2, sum);
|
|
||||||
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
colCnt = ch_im_in * dim_kernel * dim_kernel & 0x3;
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q15_t inA1 = *pA++;
|
|
||||||
q15_t inB1 = *pB++;
|
|
||||||
sum += inA1 * inB1;
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
*pOut = (q15_t)__SSAT((sum >> out_shift), 16);
|
|
||||||
pOut++;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* counter reset */
|
|
||||||
pBuffer = im_buffer;
|
|
||||||
im2col_out_pixel_index++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
(void)bufferA;
|
|
||||||
/* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
|
|
||||||
int i, j, k, l, m, n;
|
|
||||||
int conv_out;
|
|
||||||
int in_row, in_col;
|
|
||||||
|
|
||||||
for (i = 0; i < ch_im_out; i++)
|
|
||||||
{
|
|
||||||
for (j = 0; j < dim_im_out; j++)
|
|
||||||
{
|
|
||||||
for (k = 0; k < dim_im_out; k++)
|
|
||||||
{
|
|
||||||
conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
for (m = 0; m < dim_kernel; m++)
|
|
||||||
{
|
|
||||||
for (n = 0; n < dim_kernel; n++)
|
|
||||||
{
|
|
||||||
in_row = stride * j + m - padding;
|
|
||||||
in_col = stride * k + n - padding;
|
|
||||||
if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in)
|
|
||||||
{
|
|
||||||
for (l = 0; l < ch_im_in; l++)
|
|
||||||
{
|
|
||||||
conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + l] *
|
|
||||||
wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + n) * ch_im_in + l];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q15_t)__SSAT((conv_out >> out_shift), 16);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* ARM_MATH_DSP */
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,259 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_convolve_HWC_q15_fast.c
|
|
||||||
* Description: Fast Q15 version of convolution
|
|
||||||
*
|
|
||||||
* $Date: July 20, 2021
|
|
||||||
* $Revision: V.1.1.2
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Fast Q15 convolution function
|
|
||||||
* @param[in] Im_in pointer to input tensor
|
|
||||||
* @param[in] dim_im_in input tensor dimention
|
|
||||||
* @param[in] ch_im_in number of input tensor channels
|
|
||||||
* @param[in] wt pointer to kernel weights
|
|
||||||
* @param[in] ch_im_out number of filters, i.e., output tensor channels
|
|
||||||
* @param[in] dim_kernel filter kernel size
|
|
||||||
* @param[in] padding padding sizes
|
|
||||||
* @param[in] stride convolution stride
|
|
||||||
* @param[in] bias pointer to bias
|
|
||||||
* @param[in] bias_shift amount of left-shift for bias
|
|
||||||
* @param[in] out_shift amount of right-shift for output
|
|
||||||
* @param[in,out] Im_out pointer to output tensor
|
|
||||||
* @param[in] dim_im_out output tensor dimension
|
|
||||||
* @param[in,out] bufferA pointer to buffer space for input
|
|
||||||
* @param[in,out] bufferB pointer to buffer space for output
|
|
||||||
* @return The function returns either
|
|
||||||
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
|
|
||||||
*
|
|
||||||
* @details
|
|
||||||
*
|
|
||||||
* <b>Buffer size:</b>
|
|
||||||
*
|
|
||||||
* bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
|
|
||||||
*
|
|
||||||
* bufferB size: 0
|
|
||||||
*
|
|
||||||
* <b>Input dimension constraints:</b>
|
|
||||||
*
|
|
||||||
* ch_im_in is multiple of 2
|
|
||||||
*
|
|
||||||
* ch_im_out is multiple of 2
|
|
||||||
*
|
|
||||||
* dim_im_out is a multiple of 2
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_convolve_HWC_q15_fast(const q15_t *Im_in,
|
|
||||||
const uint16_t dim_im_in,
|
|
||||||
const uint16_t ch_im_in,
|
|
||||||
const q15_t *wt,
|
|
||||||
const uint16_t ch_im_out,
|
|
||||||
const uint16_t dim_kernel,
|
|
||||||
const uint16_t padding,
|
|
||||||
const uint16_t stride,
|
|
||||||
const q15_t *bias,
|
|
||||||
const uint16_t bias_shift,
|
|
||||||
const uint16_t out_shift,
|
|
||||||
q15_t *Im_out,
|
|
||||||
const uint16_t dim_im_out,
|
|
||||||
q15_t *bufferA,
|
|
||||||
q7_t *bufferB)
|
|
||||||
{
|
|
||||||
(void)bufferB;
|
|
||||||
#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
|
|
||||||
int16_t i_out_y, i_out_x, i_ker_y, i_ker_x;
|
|
||||||
|
|
||||||
q15_t *pBuffer = bufferA;
|
|
||||||
q15_t *im_buffer = bufferA;
|
|
||||||
q15_t *pOut = Im_out;
|
|
||||||
|
|
||||||
if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0 || dim_im_out & 0x1)
|
|
||||||
{
|
|
||||||
/* check if the input dimension meets the constraints */
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Run the following code for Cortex-M4 and Cortex-M7 */
|
|
||||||
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++)
|
|
||||||
{
|
|
||||||
for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++)
|
|
||||||
{
|
|
||||||
for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++)
|
|
||||||
{
|
|
||||||
if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in)
|
|
||||||
{
|
|
||||||
/* arm_fill_q15(0, pBuffer, ch_im_in); */
|
|
||||||
memset(pBuffer, 0, sizeof(q15_t) * ch_im_in);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* arm_copy_q15((q15_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer,
|
|
||||||
* ch_im_in); */
|
|
||||||
memcpy(pBuffer,
|
|
||||||
(q15_t *)Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in,
|
|
||||||
sizeof(q15_t) * ch_im_in);
|
|
||||||
}
|
|
||||||
pBuffer += ch_im_in;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (i_out_x & 0x1)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
/* initialize the matrix pointers for A */
|
|
||||||
const q15_t *pA = wt;
|
|
||||||
|
|
||||||
/* set up the second output pointers */
|
|
||||||
q15_t *pOut2 = pOut + ch_im_out;
|
|
||||||
|
|
||||||
/* this loop over rows in A */
|
|
||||||
for (i = 0; i < ch_im_out; i += 2)
|
|
||||||
{
|
|
||||||
/* setup pointers for B */
|
|
||||||
const q15_t *pB = im_buffer;
|
|
||||||
const q15_t *pB2 = pB + ch_im_in * dim_kernel * dim_kernel;
|
|
||||||
|
|
||||||
/* aling the second pointer for A */
|
|
||||||
const q15_t *pA2 = pA + ch_im_in * dim_kernel * dim_kernel;
|
|
||||||
|
|
||||||
/* init the sum with bias */
|
|
||||||
q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum2 = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum3 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum4 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
|
|
||||||
uint16_t colCnt = ch_im_in * dim_kernel * dim_kernel >> 1;
|
|
||||||
/* accumulate over the vector */
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q31_t inA1 = arm_nn_read_q15x2_ia(&pA);
|
|
||||||
q31_t inB1 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
q31_t inA2 = arm_nn_read_q15x2_ia(&pA2);
|
|
||||||
q31_t inB2 = arm_nn_read_q15x2_ia(&pB2);
|
|
||||||
|
|
||||||
sum = __SMLAD(inA1, inB1, sum);
|
|
||||||
sum2 = __SMLAD(inA1, inB2, sum2);
|
|
||||||
sum3 = __SMLAD(inA2, inB1, sum3);
|
|
||||||
sum4 = __SMLAD(inA2, inB2, sum4);
|
|
||||||
|
|
||||||
colCnt--;
|
|
||||||
} /* while over colCnt */
|
|
||||||
colCnt = ch_im_in * dim_kernel * dim_kernel & 0x1;
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q15_t inA1 = *pA++;
|
|
||||||
q15_t inB1 = *pB++;
|
|
||||||
q15_t inA2 = *pA2++;
|
|
||||||
q15_t inB2 = *pB2++;
|
|
||||||
|
|
||||||
sum += inA1 * inB1;
|
|
||||||
sum2 += inA1 * inB2;
|
|
||||||
sum3 += inA2 * inB1;
|
|
||||||
sum4 += inA2 * inB2;
|
|
||||||
colCnt--;
|
|
||||||
} /* while over colCnt */
|
|
||||||
*pOut++ = (q15_t)__SSAT(sum >> out_shift, 16);
|
|
||||||
*pOut++ = (q15_t)__SSAT(sum3 >> out_shift, 16);
|
|
||||||
*pOut2++ = (q15_t)__SSAT(sum2 >> out_shift, 16);
|
|
||||||
*pOut2++ = (q15_t)__SSAT(sum4 >> out_shift, 16);
|
|
||||||
|
|
||||||
/* skip the row computed with A2 */
|
|
||||||
pA += ch_im_in * dim_kernel * dim_kernel;
|
|
||||||
} /* for over ch_im_out */
|
|
||||||
|
|
||||||
pOut += ch_im_out;
|
|
||||||
/* counter reset */
|
|
||||||
pBuffer = im_buffer;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
(void)bufferA;
|
|
||||||
/* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
|
|
||||||
int i, j, k, l, m, n;
|
|
||||||
int conv_out;
|
|
||||||
int in_row, in_col;
|
|
||||||
|
|
||||||
if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0)
|
|
||||||
{
|
|
||||||
/* check if the input dimension meets the constraints */
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < ch_im_out; i++)
|
|
||||||
{
|
|
||||||
for (j = 0; j < dim_im_out; j++)
|
|
||||||
{
|
|
||||||
for (k = 0; k < dim_im_out; k++)
|
|
||||||
{
|
|
||||||
conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
for (m = 0; m < dim_kernel; m++)
|
|
||||||
{
|
|
||||||
for (n = 0; n < dim_kernel; n++)
|
|
||||||
{
|
|
||||||
in_row = stride * j + m - padding;
|
|
||||||
in_col = stride * k + n - padding;
|
|
||||||
if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in)
|
|
||||||
{
|
|
||||||
for (l = 0; l < ch_im_in; l++)
|
|
||||||
{
|
|
||||||
conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + l] *
|
|
||||||
wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + n) * ch_im_in + l];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q15_t)__SSAT((conv_out >> out_shift), 16);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* ARM_MATH_DSP */
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,270 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_convolve_HWC_q15_fast.c
|
|
||||||
* Description: Fast Q15 version of convolution
|
|
||||||
*
|
|
||||||
* $Date: July 20, 2021
|
|
||||||
* $Revision: V.1.1.2
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Fast Q15 convolution function (non-sqaure shape)
|
|
||||||
* @param[in] Im_in pointer to input tensor
|
|
||||||
* @param[in] dim_im_in_x input tensor dimention x
|
|
||||||
* @param[in] dim_im_in_y input tensor dimention y
|
|
||||||
* @param[in] ch_im_in number of input tensor channels
|
|
||||||
* @param[in] wt pointer to kernel weights
|
|
||||||
* @param[in] ch_im_out number of filters, i.e., output tensor channels
|
|
||||||
* @param[in] dim_kernel_x filter kernel size x
|
|
||||||
* @param[in] dim_kernel_y filter kernel size y
|
|
||||||
* @param[in] padding_x padding size x
|
|
||||||
* @param[in] padding_y padding size y
|
|
||||||
* @param[in] stride_x convolution stride x
|
|
||||||
* @param[in] stride_y convolution stride y
|
|
||||||
* @param[in] bias pointer to bias
|
|
||||||
* @param[in] bias_shift amount of left-shift for bias
|
|
||||||
* @param[in] out_shift amount of right-shift for output
|
|
||||||
* @param[in,out] Im_out pointer to output tensor
|
|
||||||
* @param[in] dim_im_out_x output tensor dimension x
|
|
||||||
* @param[in] dim_im_out_y output tensor dimension y
|
|
||||||
* @param[in,out] bufferA pointer to buffer space for input
|
|
||||||
* @param[in,out] bufferB pointer to buffer space for output
|
|
||||||
* @return The function returns either
|
|
||||||
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
|
|
||||||
*
|
|
||||||
* @details
|
|
||||||
*
|
|
||||||
* <b>Buffer size:</b>
|
|
||||||
*
|
|
||||||
* bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
|
|
||||||
*
|
|
||||||
* bufferB size: 0
|
|
||||||
*
|
|
||||||
* <b>Input dimension constraints:</b>
|
|
||||||
*
|
|
||||||
* ch_im_in is multiple of 2
|
|
||||||
*
|
|
||||||
* ch_im_out is multiple of 2
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_convolve_HWC_q15_fast_nonsquare(const q15_t *Im_in,
|
|
||||||
const uint16_t dim_im_in_x,
|
|
||||||
const uint16_t dim_im_in_y,
|
|
||||||
const uint16_t ch_im_in,
|
|
||||||
const q15_t *wt,
|
|
||||||
const uint16_t ch_im_out,
|
|
||||||
const uint16_t dim_kernel_x,
|
|
||||||
const uint16_t dim_kernel_y,
|
|
||||||
const uint16_t padding_x,
|
|
||||||
const uint16_t padding_y,
|
|
||||||
const uint16_t stride_x,
|
|
||||||
const uint16_t stride_y,
|
|
||||||
const q15_t *bias,
|
|
||||||
const uint16_t bias_shift,
|
|
||||||
const uint16_t out_shift,
|
|
||||||
q15_t *Im_out,
|
|
||||||
const uint16_t dim_im_out_x,
|
|
||||||
const uint16_t dim_im_out_y,
|
|
||||||
q15_t *bufferA,
|
|
||||||
q7_t *bufferB)
|
|
||||||
{
|
|
||||||
(void)bufferB;
|
|
||||||
#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
|
|
||||||
int16_t i_out_y, i_out_x, i_ker_y, i_ker_x;
|
|
||||||
|
|
||||||
q15_t *pBuffer = bufferA;
|
|
||||||
q15_t *im_buffer = bufferA;
|
|
||||||
q15_t *pOut = Im_out;
|
|
||||||
|
|
||||||
if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0)
|
|
||||||
{
|
|
||||||
/* check if the input dimension meets the constraints */
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Run the following code for Cortex-M4 and Cortex-M7 */
|
|
||||||
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++)
|
|
||||||
{
|
|
||||||
for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++)
|
|
||||||
{
|
|
||||||
for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y;
|
|
||||||
i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x;
|
|
||||||
i_ker_x++)
|
|
||||||
{
|
|
||||||
if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x)
|
|
||||||
{
|
|
||||||
/* arm_fill_q15(0, pBuffer, ch_im_in); */
|
|
||||||
memset(pBuffer, 0, sizeof(q15_t) * ch_im_in);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* arm_copy_q15((q15_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer,
|
|
||||||
* ch_im_in); */
|
|
||||||
memcpy(pBuffer,
|
|
||||||
(q15_t *)Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in,
|
|
||||||
sizeof(q15_t) * ch_im_in);
|
|
||||||
}
|
|
||||||
pBuffer += ch_im_in;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (i_out_x & 0x1)
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
/* initialize the matrix pointers for A */
|
|
||||||
const q15_t *pA = wt;
|
|
||||||
|
|
||||||
/* set up the second output pointers */
|
|
||||||
q15_t *pOut2 = pOut + ch_im_out;
|
|
||||||
|
|
||||||
/* this loop over rows in A */
|
|
||||||
for (i = 0; i < ch_im_out; i += 2)
|
|
||||||
{
|
|
||||||
/* setup pointers for B */
|
|
||||||
const q15_t *pB = im_buffer;
|
|
||||||
const q15_t *pB2 = pB + ch_im_in * dim_kernel_y * dim_kernel_x;
|
|
||||||
|
|
||||||
/* aling the second pointer for A */
|
|
||||||
const q15_t *pA2 = pA + ch_im_in * dim_kernel_y * dim_kernel_x;
|
|
||||||
|
|
||||||
/* init the sum with bias */
|
|
||||||
q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum2 = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum3 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum4 = ((q31_t)bias[i + 1] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
|
|
||||||
uint16_t colCnt = ch_im_in * dim_kernel_y * dim_kernel_x >> 1;
|
|
||||||
/* accumulate over the vector */
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q31_t inA1 = arm_nn_read_q15x2_ia(&pA);
|
|
||||||
q31_t inB1 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
q31_t inA2 = arm_nn_read_q15x2_ia(&pA2);
|
|
||||||
q31_t inB2 = arm_nn_read_q15x2_ia(&pB2);
|
|
||||||
|
|
||||||
sum = __SMLAD(inA1, inB1, sum);
|
|
||||||
sum2 = __SMLAD(inA1, inB2, sum2);
|
|
||||||
sum3 = __SMLAD(inA2, inB1, sum3);
|
|
||||||
sum4 = __SMLAD(inA2, inB2, sum4);
|
|
||||||
|
|
||||||
colCnt--;
|
|
||||||
} /* while over colCnt */
|
|
||||||
colCnt = ch_im_in * dim_kernel_y * dim_kernel_x & 0x1;
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q15_t inA1 = *pA++;
|
|
||||||
q15_t inB1 = *pB++;
|
|
||||||
q15_t inA2 = *pA2++;
|
|
||||||
q15_t inB2 = *pB2++;
|
|
||||||
|
|
||||||
sum += inA1 * inB1;
|
|
||||||
sum2 += inA1 * inB2;
|
|
||||||
sum3 += inA2 * inB1;
|
|
||||||
sum4 += inA2 * inB2;
|
|
||||||
colCnt--;
|
|
||||||
} /* while over colCnt */
|
|
||||||
*pOut++ = (q15_t)__SSAT(sum >> out_shift, 16);
|
|
||||||
*pOut++ = (q15_t)__SSAT(sum3 >> out_shift, 16);
|
|
||||||
*pOut2++ = (q15_t)__SSAT(sum2 >> out_shift, 16);
|
|
||||||
*pOut2++ = (q15_t)__SSAT(sum4 >> out_shift, 16);
|
|
||||||
|
|
||||||
/* skip the row computed with A2 */
|
|
||||||
pA += ch_im_in * dim_kernel_y * dim_kernel_x;
|
|
||||||
} /* for over ch_im_out */
|
|
||||||
|
|
||||||
pOut += ch_im_out;
|
|
||||||
/* counter reset */
|
|
||||||
pBuffer = im_buffer;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
(void)bufferA;
|
|
||||||
/* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
|
|
||||||
int i, j, k, l, m, n;
|
|
||||||
int conv_out;
|
|
||||||
int in_row, in_col;
|
|
||||||
|
|
||||||
if (ch_im_in % 2 != 0 || ch_im_out % 2 != 0)
|
|
||||||
{
|
|
||||||
/* check if the input dimension meets the constraints */
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < ch_im_out; i++)
|
|
||||||
{
|
|
||||||
for (j = 0; j < dim_im_out_y; j++)
|
|
||||||
{
|
|
||||||
for (k = 0; k < dim_im_out_x; k++)
|
|
||||||
{
|
|
||||||
conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
for (m = 0; m < dim_kernel_y; m++)
|
|
||||||
{
|
|
||||||
for (n = 0; n < dim_kernel_x; n++)
|
|
||||||
{
|
|
||||||
in_row = stride_y * j + m - padding_y;
|
|
||||||
in_col = stride_x * k + n - padding_x;
|
|
||||||
if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x)
|
|
||||||
{
|
|
||||||
for (l = 0; l < ch_im_in; l++)
|
|
||||||
{
|
|
||||||
conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + l] *
|
|
||||||
wt[i * ch_im_in * dim_kernel_x * dim_kernel_y + (m * dim_kernel_x + n) * ch_im_in +
|
|
||||||
l];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q15_t)__SSAT((conv_out >> out_shift), 16);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* ARM_MATH_DSP */
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,280 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_convolve_HWC_q7_RGB.c
|
|
||||||
* Description: Q7 version of convolution for RGB image
|
|
||||||
*
|
|
||||||
* $Date: July 20, 2021
|
|
||||||
* $Revision: V.1.1.2
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Q7 convolution function for RGB image
|
|
||||||
* @param[in] Im_in pointer to input tensor
|
|
||||||
* @param[in] dim_im_in input tensor dimention
|
|
||||||
* @param[in] ch_im_in number of input tensor channels
|
|
||||||
* @param[in] wt pointer to kernel weights
|
|
||||||
* @param[in] ch_im_out number of filters, i.e., output tensor channels
|
|
||||||
* @param[in] dim_kernel filter kernel size
|
|
||||||
* @param[in] padding padding sizes
|
|
||||||
* @param[in] stride convolution stride
|
|
||||||
* @param[in] bias pointer to bias
|
|
||||||
* @param[in] bias_shift amount of left-shift for bias
|
|
||||||
* @param[in] out_shift amount of right-shift for output
|
|
||||||
* @param[in,out] Im_out pointer to output tensor
|
|
||||||
* @param[in] dim_im_out output tensor dimension
|
|
||||||
* @param[in,out] bufferA pointer to buffer space for input
|
|
||||||
* @param[in,out] bufferB pointer to buffer space for output
|
|
||||||
* @return The function returns either
|
|
||||||
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
|
|
||||||
*
|
|
||||||
* @details
|
|
||||||
*
|
|
||||||
* <b>Buffer size:</b>
|
|
||||||
*
|
|
||||||
* bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
|
|
||||||
*
|
|
||||||
* bufferB size: 0
|
|
||||||
*
|
|
||||||
* <b>Input dimension constraints:</b>
|
|
||||||
*
|
|
||||||
* ch_im_in equals 3
|
|
||||||
*
|
|
||||||
* This kernel is written exclusively for convolution with ch_im_in
|
|
||||||
* equals 3. This applies on the first layer of CNNs which has input
|
|
||||||
* image with RGB format.
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_convolve_HWC_q7_RGB(const q7_t *Im_in,
|
|
||||||
const uint16_t dim_im_in,
|
|
||||||
const uint16_t ch_im_in,
|
|
||||||
const q7_t *wt,
|
|
||||||
const uint16_t ch_im_out,
|
|
||||||
const uint16_t dim_kernel,
|
|
||||||
const uint16_t padding,
|
|
||||||
const uint16_t stride,
|
|
||||||
const q7_t *bias,
|
|
||||||
const uint16_t bias_shift,
|
|
||||||
const uint16_t out_shift,
|
|
||||||
q7_t *Im_out,
|
|
||||||
const uint16_t dim_im_out,
|
|
||||||
q15_t *bufferA,
|
|
||||||
q7_t *bufferB)
|
|
||||||
{
|
|
||||||
(void)bufferB;
|
|
||||||
#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
|
|
||||||
/* Run the following code for Cortex-M4 and Cortex-M7 */
|
|
||||||
int16_t i_out_y, i_out_x, i_ker_y, i_ker_x;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Here we use bufferA as q15_t internally as computation are done with q15_t level
|
|
||||||
* im2col are done to output in q15_t format from q7_t input
|
|
||||||
*/
|
|
||||||
q15_t *pBuffer = bufferA;
|
|
||||||
q7_t *pOut = Im_out;
|
|
||||||
|
|
||||||
// check if number of input channels is 3
|
|
||||||
if (ch_im_in != 3)
|
|
||||||
{
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
// This part implements the im2col function
|
|
||||||
for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++)
|
|
||||||
{
|
|
||||||
for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++)
|
|
||||||
{
|
|
||||||
for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++)
|
|
||||||
{
|
|
||||||
if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in)
|
|
||||||
{
|
|
||||||
/* Equivalent to arm_fill_q15(0, pBuffer, ch_im_in) with assumption: ch_im_in = 3 */
|
|
||||||
arm_memset_q7((q7_t *)pBuffer, (q7_t)0, 3 * sizeof(q15_t));
|
|
||||||
pBuffer += 3;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Equivalent to:
|
|
||||||
* arm_q7_to_q15_no_shift( (q7_t*)Im_in+(i_ker_y*dim_im_in+i_ker_x)*3, pBuffer, 3);
|
|
||||||
*/
|
|
||||||
|
|
||||||
const q7_t *pPixel = Im_in + (i_ker_y * dim_im_in + i_ker_x) * 3;
|
|
||||||
q31_t buf = arm_nn_read_q7x4(pPixel);
|
|
||||||
|
|
||||||
union arm_nnword top;
|
|
||||||
union arm_nnword bottom;
|
|
||||||
|
|
||||||
top.word = __SXTB16(buf);
|
|
||||||
bottom.word = __SXTB16(__ROR(buf, 8));
|
|
||||||
|
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
|
||||||
/*
|
|
||||||
* little-endian, | omit | 3rd | 2nd | 1st |
|
|
||||||
* MSB LSB
|
|
||||||
* top | 3rd | 1st |; bottom | omit | 2nd |
|
|
||||||
*
|
|
||||||
* version 1, need to swap 2nd and 3rd weight
|
|
||||||
* *__SIMD32(pBuffer) = top.word;
|
|
||||||
* *(pBuffer+2) = bottom.half_words[0];
|
|
||||||
*
|
|
||||||
* version 2, no weight shuffling required
|
|
||||||
*/
|
|
||||||
*pBuffer++ = top.half_words[0];
|
|
||||||
int32_t packed_word = __PKHBT(bottom.word, top.word, 0);
|
|
||||||
arm_memcpy_q7((q7_t *)pBuffer, (q7_t *)&packed_word, 4);
|
|
||||||
#else
|
|
||||||
/*
|
|
||||||
* big-endian, | 1st | 2nd | 3rd | omit |
|
|
||||||
* MSB LSB
|
|
||||||
* top | 2nd | omit |; bottom | 1st | 3rd |
|
|
||||||
*
|
|
||||||
* version 1, need to swap 2nd and 3rd weight
|
|
||||||
* *__SIMD32(pBuffer) = bottom.word;
|
|
||||||
* *(pBuffer+2) = top.half_words[1];
|
|
||||||
*
|
|
||||||
* version 2, no weight shuffling required
|
|
||||||
*/
|
|
||||||
*pBuffer++ = bottom.half_words[0];
|
|
||||||
int32_t packed_word = __PKHTB(top.word, bottom.word, 0);
|
|
||||||
arm_memcpy_q7((q7_t *)pBuffer, (q7_t *)&packed_word, 4);
|
|
||||||
#endif
|
|
||||||
pBuffer += 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pBuffer == bufferA + 2 * 3 * dim_kernel * dim_kernel)
|
|
||||||
{
|
|
||||||
pOut = arm_nn_mat_mult_kernel_q7_q15(
|
|
||||||
wt, bufferA, ch_im_out, 3 * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut);
|
|
||||||
|
|
||||||
/* counter reset */
|
|
||||||
pBuffer = bufferA;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* left-over because odd number of output pixels */
|
|
||||||
if (pBuffer != bufferA)
|
|
||||||
{
|
|
||||||
const q7_t *pA = wt;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < ch_im_out; i++)
|
|
||||||
{
|
|
||||||
q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q15_t *pB = bufferA;
|
|
||||||
/* basically each time it process 4 entries */
|
|
||||||
uint16_t colCnt = 3 * dim_kernel * dim_kernel >> 2;
|
|
||||||
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
|
|
||||||
q31_t inA1, inA2;
|
|
||||||
q31_t inB1, inB2;
|
|
||||||
|
|
||||||
pA = read_and_pad(pA, &inA1, &inA2);
|
|
||||||
|
|
||||||
inB1 = arm_nn_read_q15x2_ia((const q15_t **)&pB);
|
|
||||||
sum = __SMLAD(inA1, inB1, sum);
|
|
||||||
inB2 = arm_nn_read_q15x2_ia((const q15_t **)&pB);
|
|
||||||
sum = __SMLAD(inA2, inB2, sum);
|
|
||||||
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
colCnt = 3 * dim_kernel * dim_kernel & 0x3;
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q7_t inA1 = *pA++;
|
|
||||||
q15_t inB1 = *pB++;
|
|
||||||
sum += inA1 * inB1;
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
*pOut++ = (q7_t)__SSAT((sum >> out_shift), 8);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
(void)bufferA;
|
|
||||||
/* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
|
|
||||||
int i, j, k, l, m, n;
|
|
||||||
int conv_out;
|
|
||||||
int in_row, in_col;
|
|
||||||
|
|
||||||
// check if number of input channels is 3
|
|
||||||
if (ch_im_in != 3)
|
|
||||||
{
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < ch_im_out; i++)
|
|
||||||
{
|
|
||||||
for (j = 0; j < dim_im_out; j++)
|
|
||||||
{
|
|
||||||
for (k = 0; k < dim_im_out; k++)
|
|
||||||
{
|
|
||||||
conv_out = (bias[i] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
for (m = 0; m < dim_kernel; m++)
|
|
||||||
{
|
|
||||||
for (n = 0; n < dim_kernel; n++)
|
|
||||||
{
|
|
||||||
/* if-for implementation */
|
|
||||||
in_row = stride * j + m - padding;
|
|
||||||
in_col = stride * k + n - padding;
|
|
||||||
if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in)
|
|
||||||
{
|
|
||||||
for (l = 0; l < ch_im_in; l++)
|
|
||||||
{
|
|
||||||
conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + l] *
|
|
||||||
wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + n) * ch_im_in + l];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q7_t)__SSAT((conv_out >> out_shift), 8);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* ARM_MATH_DSP */
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return (ARM_MATH_SUCCESS);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,227 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_convolve_HWC_q7_basic.c
|
|
||||||
* Description: Q7 version of convolution
|
|
||||||
*
|
|
||||||
* $Date: 20. July 2021
|
|
||||||
* $Revision: V.1.1.1
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Basic Q7 convolution function
|
|
||||||
* @param[in] Im_in pointer to input tensor
|
|
||||||
* @param[in] dim_im_in input tensor dimention
|
|
||||||
* @param[in] ch_im_in number of input tensor channels
|
|
||||||
* @param[in] wt pointer to kernel weights
|
|
||||||
* @param[in] ch_im_out number of filters, i.e., output tensor channels
|
|
||||||
* @param[in] dim_kernel filter kernel size
|
|
||||||
* @param[in] padding padding sizes
|
|
||||||
* @param[in] stride convolution stride
|
|
||||||
* @param[in] bias pointer to bias
|
|
||||||
* @param[in] bias_shift amount of left-shift for bias
|
|
||||||
* @param[in] out_shift amount of right-shift for output
|
|
||||||
* @param[in,out] Im_out pointer to output tensor
|
|
||||||
* @param[in] dim_im_out output tensor dimension
|
|
||||||
* @param[in,out] bufferA pointer to buffer space for input
|
|
||||||
* @param[in,out] bufferB pointer to buffer space for output
|
|
||||||
* @return The function returns <code>ARM_MATH_SUCCESS</code>
|
|
||||||
*
|
|
||||||
* @details
|
|
||||||
*
|
|
||||||
* <b>Buffer size:</b>
|
|
||||||
*
|
|
||||||
* bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
|
|
||||||
*
|
|
||||||
* bufferB size: 0
|
|
||||||
*
|
|
||||||
* This basic version is designed to work for any input tensor and weight
|
|
||||||
* dimension.
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_convolve_HWC_q7_basic(const q7_t *Im_in,
|
|
||||||
const uint16_t dim_im_in,
|
|
||||||
const uint16_t ch_im_in,
|
|
||||||
const q7_t *wt,
|
|
||||||
const uint16_t ch_im_out,
|
|
||||||
const uint16_t dim_kernel,
|
|
||||||
const uint16_t padding,
|
|
||||||
const uint16_t stride,
|
|
||||||
const q7_t *bias,
|
|
||||||
const uint16_t bias_shift,
|
|
||||||
const uint16_t out_shift,
|
|
||||||
q7_t *Im_out,
|
|
||||||
const uint16_t dim_im_out,
|
|
||||||
q15_t *bufferA,
|
|
||||||
q7_t *bufferB)
|
|
||||||
{
|
|
||||||
(void)bufferB;
|
|
||||||
#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
|
|
||||||
/* Run the following code for Cortex-M4 and Cortex-M7 */
|
|
||||||
|
|
||||||
int16_t i_out_y, i_out_x, i_ker_y, i_ker_x;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Here we use bufferA as q15_t internally as computation are done with q15_t level
|
|
||||||
* im2col are done to output in q15_t format from q7_t input
|
|
||||||
*/
|
|
||||||
q15_t *pBuffer = bufferA;
|
|
||||||
q7_t *pOut = Im_out;
|
|
||||||
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++)
|
|
||||||
{
|
|
||||||
for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++)
|
|
||||||
{
|
|
||||||
for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++)
|
|
||||||
{
|
|
||||||
if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in)
|
|
||||||
{
|
|
||||||
/* Filling 0 for out-of-bound paddings */
|
|
||||||
/* arm_fill_q15(0, pBuffer, ch_im_in); */
|
|
||||||
memset(pBuffer, 0, sizeof(q15_t) * ch_im_in);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* Copying the pixel data to column */
|
|
||||||
arm_q7_to_q15_no_shift(
|
|
||||||
(q7_t *)Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in);
|
|
||||||
}
|
|
||||||
pBuffer += ch_im_in;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Computation is filed for every 2 columns */
|
|
||||||
if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel)
|
|
||||||
{
|
|
||||||
pOut = arm_nn_mat_mult_kernel_q7_q15(
|
|
||||||
wt, bufferA, ch_im_out, ch_im_in * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut);
|
|
||||||
|
|
||||||
/* counter reset */
|
|
||||||
pBuffer = bufferA;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* left-over because odd number of output pixels */
|
|
||||||
if (pBuffer != bufferA)
|
|
||||||
{
|
|
||||||
const q7_t *pA = wt;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < ch_im_out; i++)
|
|
||||||
{
|
|
||||||
/* Load the accumulator with bias first */
|
|
||||||
q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
|
|
||||||
/* Point to the beging of the im2col buffer */
|
|
||||||
const q15_t *pB = bufferA;
|
|
||||||
|
|
||||||
/* Each time it process 4 entries */
|
|
||||||
uint16_t colCnt = ch_im_in * dim_kernel * dim_kernel >> 2;
|
|
||||||
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q31_t inA1, inA2;
|
|
||||||
q31_t inB1, inB2;
|
|
||||||
|
|
||||||
pA = read_and_pad(pA, &inA1, &inA2);
|
|
||||||
|
|
||||||
inB1 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
sum = __SMLAD(inA1, inB1, sum);
|
|
||||||
inB2 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
|
|
||||||
sum = __SMLAD(inA2, inB2, sum);
|
|
||||||
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
colCnt = ch_im_in * dim_kernel * dim_kernel & 0x3;
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q7_t inA1 = *pA++;
|
|
||||||
q15_t inB1 = *pB++;
|
|
||||||
sum += inA1 * inB1;
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
*pOut++ = (q7_t)__SSAT((sum >> out_shift), 8);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
/* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
|
|
||||||
(void)bufferA;
|
|
||||||
int i, j, k, l, m, n;
|
|
||||||
int conv_out;
|
|
||||||
int in_row, in_col;
|
|
||||||
|
|
||||||
for (i = 0; i < ch_im_out; i++)
|
|
||||||
{
|
|
||||||
for (j = 0; j < dim_im_out; j++)
|
|
||||||
{
|
|
||||||
for (k = 0; k < dim_im_out; k++)
|
|
||||||
{
|
|
||||||
conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
for (m = 0; m < dim_kernel; m++)
|
|
||||||
{
|
|
||||||
for (n = 0; n < dim_kernel; n++)
|
|
||||||
{
|
|
||||||
// if-for implementation
|
|
||||||
in_row = stride * j + m - padding;
|
|
||||||
in_col = stride * k + n - padding;
|
|
||||||
if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in)
|
|
||||||
{
|
|
||||||
for (l = 0; l < ch_im_in; l++)
|
|
||||||
{
|
|
||||||
conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + l] *
|
|
||||||
wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + n) * ch_im_in + l];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q7_t)__SSAT((conv_out >> out_shift), 8);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* ARM_MATH_DSP */
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,229 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_convolve_HWC_q7_basic.c
|
|
||||||
* Description: Q7 version of convolution
|
|
||||||
*
|
|
||||||
* $Date: July 20, 2021
|
|
||||||
* $Revision: V.1.1.2
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Basic Q7 convolution function (non-sqaure shape)
|
|
||||||
* @param[in] Im_in pointer to input tensor
|
|
||||||
* @param[in] dim_im_in_x input tensor dimention x
|
|
||||||
* @param[in] dim_im_in_y input tensor dimention y
|
|
||||||
* @param[in] ch_im_in number of input tensor channels
|
|
||||||
* @param[in] wt pointer to kernel weights
|
|
||||||
* @param[in] ch_im_out number of filters, i.e., output tensor channels
|
|
||||||
* @param[in] dim_kernel_x filter kernel size x
|
|
||||||
* @param[in] dim_kernel_y filter kernel size y
|
|
||||||
* @param[in] padding_x padding size x
|
|
||||||
* @param[in] padding_y padding size y
|
|
||||||
* @param[in] stride_x convolution stride x
|
|
||||||
* @param[in] stride_y convolution stride y
|
|
||||||
* @param[in] bias pointer to bias
|
|
||||||
* @param[in] bias_shift amount of left-shift for bias
|
|
||||||
* @param[in] out_shift amount of right-shift for output
|
|
||||||
* @param[in,out] Im_out pointer to output tensor
|
|
||||||
* @param[in] dim_im_out_x output tensor dimension x
|
|
||||||
* @param[in] dim_im_out_y output tensor dimension y
|
|
||||||
* @param[in,out] bufferA pointer to buffer space for input
|
|
||||||
* @param[in,out] bufferB pointer to buffer space for output
|
|
||||||
* @return The function returns <code>ARM_MATH_SUCCESS</code>
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_convolve_HWC_q7_basic_nonsquare(const q7_t *Im_in,
|
|
||||||
const uint16_t dim_im_in_x,
|
|
||||||
const uint16_t dim_im_in_y,
|
|
||||||
const uint16_t ch_im_in,
|
|
||||||
const q7_t *wt,
|
|
||||||
const uint16_t ch_im_out,
|
|
||||||
const uint16_t dim_kernel_x,
|
|
||||||
const uint16_t dim_kernel_y,
|
|
||||||
const uint16_t padding_x,
|
|
||||||
const uint16_t padding_y,
|
|
||||||
const uint16_t stride_x,
|
|
||||||
const uint16_t stride_y,
|
|
||||||
const q7_t *bias,
|
|
||||||
const uint16_t bias_shift,
|
|
||||||
const uint16_t out_shift,
|
|
||||||
q7_t *Im_out,
|
|
||||||
const uint16_t dim_im_out_x,
|
|
||||||
const uint16_t dim_im_out_y,
|
|
||||||
q15_t *bufferA,
|
|
||||||
q7_t *bufferB)
|
|
||||||
{
|
|
||||||
(void)bufferB;
|
|
||||||
#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
|
|
||||||
/* Run the following code for Cortex-M4 and Cortex-M7 */
|
|
||||||
|
|
||||||
int16_t i_out_y, i_out_x, i_ker_y, i_ker_x;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Here we use bufferA as q15_t internally as computation are done with q15_t level
|
|
||||||
* im2col are done to output in q15_t format from q7_t input
|
|
||||||
*/
|
|
||||||
q15_t *pBuffer = bufferA;
|
|
||||||
q7_t *pOut = Im_out;
|
|
||||||
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++)
|
|
||||||
{
|
|
||||||
for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++)
|
|
||||||
{
|
|
||||||
for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y;
|
|
||||||
i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x;
|
|
||||||
i_ker_x++)
|
|
||||||
{
|
|
||||||
if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x)
|
|
||||||
{
|
|
||||||
/* Filling 0 for out-of-bound paddings */
|
|
||||||
/* arm_fill_q15(0, pBuffer, ch_im_in); */
|
|
||||||
memset(pBuffer, 0, sizeof(q15_t) * ch_im_in);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* Copying the pixel data to column */
|
|
||||||
arm_q7_to_q15_no_shift(
|
|
||||||
(q7_t *)Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer, ch_im_in);
|
|
||||||
}
|
|
||||||
pBuffer += ch_im_in;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Computation is filed for every 2 columns */
|
|
||||||
if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_y * dim_kernel_x)
|
|
||||||
{
|
|
||||||
pOut = arm_nn_mat_mult_kernel_q7_q15(
|
|
||||||
wt, bufferA, ch_im_out, ch_im_in * dim_kernel_y * dim_kernel_x, bias_shift, out_shift, bias, pOut);
|
|
||||||
|
|
||||||
/* counter reset */
|
|
||||||
pBuffer = bufferA;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* left-over because odd number of output pixels */
|
|
||||||
if (pBuffer != bufferA)
|
|
||||||
{
|
|
||||||
const q7_t *pA = wt;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < ch_im_out; i++)
|
|
||||||
{
|
|
||||||
/* Load the accumulator with bias first */
|
|
||||||
q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
|
|
||||||
/* Point to the beging of the im2col buffer */
|
|
||||||
const q15_t *pB = bufferA;
|
|
||||||
|
|
||||||
/* Each time it process 4 entries */
|
|
||||||
uint16_t colCnt = ch_im_in * dim_kernel_y * dim_kernel_x >> 2;
|
|
||||||
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q31_t inA1, inA2;
|
|
||||||
q31_t inB1, inB2;
|
|
||||||
|
|
||||||
pA = read_and_pad(pA, &inA1, &inA2);
|
|
||||||
|
|
||||||
inB1 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
sum = __SMLAD(inA1, inB1, sum);
|
|
||||||
inB2 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
|
|
||||||
sum = __SMLAD(inA2, inB2, sum);
|
|
||||||
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
colCnt = ch_im_in * dim_kernel_y * dim_kernel_x & 0x3;
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q7_t inA1 = *pA++;
|
|
||||||
q15_t inB1 = *pB++;
|
|
||||||
sum += inA1 * inB1;
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
*pOut++ = (q7_t)__SSAT((sum >> out_shift), 8);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
/* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
|
|
||||||
(void)bufferA;
|
|
||||||
int i, j, k, l, m, n;
|
|
||||||
int conv_out;
|
|
||||||
int in_row, in_col;
|
|
||||||
|
|
||||||
for (i = 0; i < ch_im_out; i++)
|
|
||||||
{
|
|
||||||
for (j = 0; j < dim_im_out_y; j++)
|
|
||||||
{
|
|
||||||
for (k = 0; k < dim_im_out_x; k++)
|
|
||||||
{
|
|
||||||
conv_out = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
for (m = 0; m < dim_kernel_y; m++)
|
|
||||||
{
|
|
||||||
for (n = 0; n < dim_kernel_x; n++)
|
|
||||||
{
|
|
||||||
// if-for implementation
|
|
||||||
in_row = stride_y * j + m - padding_y;
|
|
||||||
in_col = stride_x * k + n - padding_x;
|
|
||||||
if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x)
|
|
||||||
{
|
|
||||||
for (l = 0; l < ch_im_in; l++)
|
|
||||||
{
|
|
||||||
conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + l] *
|
|
||||||
wt[i * ch_im_in * dim_kernel_y * dim_kernel_x + (m * dim_kernel_x + n) * ch_im_in +
|
|
||||||
l];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q7_t)__SSAT((conv_out >> out_shift), 8);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* ARM_MATH_DSP */
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,380 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_convolve_HWC_q7_fast.c
|
|
||||||
* Description: Fast Q7 version of convolution
|
|
||||||
*
|
|
||||||
* $Date: July 20, 2021
|
|
||||||
* $Revision: V.1.1.2
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Fast Q7 convolution function
|
|
||||||
* @param[in] Im_in pointer to input tensor
|
|
||||||
* @param[in] dim_im_in input tensor dimention
|
|
||||||
* @param[in] ch_im_in number of input tensor channels
|
|
||||||
* @param[in] wt pointer to kernel weights
|
|
||||||
* @param[in] ch_im_out number of filters, i.e., output tensor channels
|
|
||||||
* @param[in] dim_kernel filter kernel size
|
|
||||||
* @param[in] padding padding sizes
|
|
||||||
* @param[in] stride convolution stride
|
|
||||||
* @param[in] bias pointer to bias
|
|
||||||
* @param[in] bias_shift amount of left-shift for bias
|
|
||||||
* @param[in] out_shift amount of right-shift for output
|
|
||||||
* @param[in,out] Im_out pointer to output tensor
|
|
||||||
* @param[in] dim_im_out output tensor dimension
|
|
||||||
* @param[in,out] bufferA pointer to buffer space for input
|
|
||||||
* @param[in,out] bufferB pointer to buffer space for output
|
|
||||||
* @return The function returns either
|
|
||||||
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
|
|
||||||
*
|
|
||||||
* @details
|
|
||||||
*
|
|
||||||
* <b>Buffer size:</b>
|
|
||||||
*
|
|
||||||
* bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
|
|
||||||
*
|
|
||||||
* bufferB size: 0
|
|
||||||
*
|
|
||||||
* <b>Input dimension constraints:</b>
|
|
||||||
*
|
|
||||||
* ch_im_in is multiple of 4 ( because of the SIMD32 read and swap )
|
|
||||||
*
|
|
||||||
* ch_im_out is multiple of 2 ( bacause 2x2 mat_mult kernel )
|
|
||||||
*
|
|
||||||
* The im2col converts the Q7 tensor input into Q15 column, which is stored in
|
|
||||||
* bufferA. There is reordering happenning during this im2col process with
|
|
||||||
* arm_q7_to_q15_reordered_no_shift. For every four elements, the second and
|
|
||||||
* third elements are swapped.
|
|
||||||
*
|
|
||||||
* The computation kernel arm_nn_mat_mult_kernel_q7_q15_reordered does the
|
|
||||||
* GEMM computation with the reordered columns.
|
|
||||||
*
|
|
||||||
* To speed-up the determination of the padding condition, we split the
|
|
||||||
* computation into 3x3 parts, i.e., {top, mid, bottom} X {left, mid, right}.
|
|
||||||
* This reduces the total number of boundary condition checks and improves
|
|
||||||
* the data copying performance.
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_convolve_HWC_q7_fast(const q7_t *Im_in,
|
|
||||||
const uint16_t dim_im_in,
|
|
||||||
const uint16_t ch_im_in,
|
|
||||||
const q7_t *wt,
|
|
||||||
const uint16_t ch_im_out,
|
|
||||||
const uint16_t dim_kernel,
|
|
||||||
const uint16_t padding,
|
|
||||||
const uint16_t stride,
|
|
||||||
const q7_t *bias,
|
|
||||||
const uint16_t bias_shift,
|
|
||||||
const uint16_t out_shift,
|
|
||||||
q7_t *Im_out,
|
|
||||||
const uint16_t dim_im_out,
|
|
||||||
q15_t *bufferA,
|
|
||||||
q7_t *bufferB)
|
|
||||||
{
|
|
||||||
(void)bufferB;
|
|
||||||
#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
|
|
||||||
/* Run the following code for Cortex-M4 and Cortex-M7 */
|
|
||||||
|
|
||||||
int16_t i_out_y, i_out_x, i_ker_y, i_ker_x;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Here we use bufferA as q15_t internally as computation are done with q15_t level
|
|
||||||
* im2col are done to output in q15_t format from q7_t input
|
|
||||||
*/
|
|
||||||
|
|
||||||
q15_t *pBuffer = bufferA;
|
|
||||||
q7_t *pOut = Im_out;
|
|
||||||
|
|
||||||
if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0)
|
|
||||||
{
|
|
||||||
/* check if the input dimension meets the constraints */
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Here we split the entire matrix into three regions depending on the padding situation
|
|
||||||
* Top: i_out_y from 0 to padding - 1
|
|
||||||
* Middle: i_out_y from padding to dim_im_out-padding-1
|
|
||||||
* Bottom: i_out_y from dim_im_out-padding to dim_im_out-1
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* top part */
|
|
||||||
for (i_out_y = 0; i_out_y < padding; i_out_y++)
|
|
||||||
{
|
|
||||||
for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++)
|
|
||||||
{
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++)
|
|
||||||
{
|
|
||||||
if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in)
|
|
||||||
{
|
|
||||||
/* arm_fill_q15(0, pBuffer, ch_im_in); */
|
|
||||||
memset(pBuffer, 0, sizeof(q15_t) * ch_im_in);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
arm_q7_to_q15_reordered_no_shift(
|
|
||||||
(q7_t *)Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in);
|
|
||||||
}
|
|
||||||
pBuffer += ch_im_in;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel)
|
|
||||||
{
|
|
||||||
pOut = arm_nn_mat_mult_kernel_q7_q15_reordered(
|
|
||||||
wt, bufferA, ch_im_out, ch_im_in * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut);
|
|
||||||
/* counter reset */
|
|
||||||
pBuffer = bufferA;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* middle part, here we also divide the x into left, mid and right */
|
|
||||||
for (; i_out_y < dim_im_out - padding; i_out_y++)
|
|
||||||
{
|
|
||||||
|
|
||||||
/* left part */
|
|
||||||
for (i_out_x = 0; i_out_x < padding; i_out_x++)
|
|
||||||
{
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++)
|
|
||||||
{
|
|
||||||
if (i_ker_x < 0 || i_ker_x >= dim_im_in)
|
|
||||||
{
|
|
||||||
/* arm_fill_q15(0, pBuffer, ch_im_in); */
|
|
||||||
memset(pBuffer, 0, sizeof(q15_t) * ch_im_in);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
arm_q7_to_q15_reordered_no_shift(
|
|
||||||
(q7_t *)Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in);
|
|
||||||
}
|
|
||||||
pBuffer += ch_im_in;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel)
|
|
||||||
{
|
|
||||||
pOut = arm_nn_mat_mult_kernel_q7_q15_reordered(
|
|
||||||
wt, bufferA, ch_im_out, ch_im_in * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut);
|
|
||||||
/* counter reset */
|
|
||||||
pBuffer = bufferA;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* mid part */
|
|
||||||
for (; i_out_x < dim_im_out - padding; i_out_x++)
|
|
||||||
{
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
|
|
||||||
{
|
|
||||||
arm_q7_to_q15_reordered_no_shift((q7_t *)Im_in +
|
|
||||||
(i_ker_y * dim_im_in + i_out_x * stride - padding) * ch_im_in,
|
|
||||||
pBuffer,
|
|
||||||
ch_im_in * dim_kernel);
|
|
||||||
pBuffer += ch_im_in * dim_kernel;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel)
|
|
||||||
{
|
|
||||||
pOut = arm_nn_mat_mult_kernel_q7_q15_reordered(
|
|
||||||
wt, bufferA, ch_im_out, ch_im_in * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut);
|
|
||||||
/* counter reset */
|
|
||||||
pBuffer = bufferA;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* right part */
|
|
||||||
for (; i_out_x < dim_im_out; i_out_x++)
|
|
||||||
{
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++)
|
|
||||||
{
|
|
||||||
if (i_ker_x < 0 || i_ker_x >= dim_im_in)
|
|
||||||
{
|
|
||||||
/* arm_fill_q15(0, pBuffer, ch_im_in); */
|
|
||||||
memset(pBuffer, 0, sizeof(q15_t) * ch_im_in);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
arm_q7_to_q15_reordered_no_shift(
|
|
||||||
(q7_t *)Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in);
|
|
||||||
}
|
|
||||||
pBuffer += ch_im_in;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel)
|
|
||||||
{
|
|
||||||
pOut = arm_nn_mat_mult_kernel_q7_q15_reordered(
|
|
||||||
wt, bufferA, ch_im_out, ch_im_in * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut);
|
|
||||||
/* counter reset */
|
|
||||||
pBuffer = bufferA;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (; i_out_y < dim_im_out; i_out_y++)
|
|
||||||
{
|
|
||||||
for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++)
|
|
||||||
{
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++)
|
|
||||||
{
|
|
||||||
if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in)
|
|
||||||
{
|
|
||||||
/* arm_fill_q15(0, pBuffer, ch_im_in); */
|
|
||||||
memset(pBuffer, 0, sizeof(q15_t) * ch_im_in);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
arm_q7_to_q15_reordered_no_shift(
|
|
||||||
(q7_t *)Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in);
|
|
||||||
}
|
|
||||||
pBuffer += ch_im_in;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel * dim_kernel)
|
|
||||||
{
|
|
||||||
pOut = arm_nn_mat_mult_kernel_q7_q15_reordered(
|
|
||||||
wt, bufferA, ch_im_out, ch_im_in * dim_kernel * dim_kernel, bias_shift, out_shift, bias, pOut);
|
|
||||||
/* counter reset */
|
|
||||||
pBuffer = bufferA;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* check if there is left-over for compute */
|
|
||||||
if (pBuffer != bufferA)
|
|
||||||
{
|
|
||||||
const q7_t *pA = wt;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < ch_im_out; i++)
|
|
||||||
{
|
|
||||||
q31_t sum = ((q31_t)bias[i] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
const q15_t *pB = bufferA;
|
|
||||||
/* each time it process 4 entries */
|
|
||||||
uint16_t colCnt = ch_im_in * dim_kernel * dim_kernel >> 2;
|
|
||||||
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
|
|
||||||
q31_t inA1, inA2;
|
|
||||||
q31_t inB1, inB2;
|
|
||||||
|
|
||||||
pA = read_and_pad_reordered(pA, &inA1, &inA2);
|
|
||||||
|
|
||||||
inB1 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
sum = __SMLAD(inA1, inB1, sum);
|
|
||||||
inB2 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
sum = __SMLAD(inA2, inB2, sum);
|
|
||||||
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
colCnt = ch_im_in * dim_kernel * dim_kernel & 0x3;
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q7_t inA1 = *pA++;
|
|
||||||
q15_t inB1 = *pB++;
|
|
||||||
sum += inA1 * inB1;
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
*pOut = (q7_t)__SSAT((sum >> out_shift), 8);
|
|
||||||
pOut++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
(void)bufferA;
|
|
||||||
/* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
|
|
||||||
int i, j, k, l, m, n;
|
|
||||||
int conv_out;
|
|
||||||
int in_row, in_col;
|
|
||||||
|
|
||||||
if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0)
|
|
||||||
{
|
|
||||||
/* check if the input dimension meets the constraints */
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < ch_im_out; i++)
|
|
||||||
{
|
|
||||||
for (j = 0; j < dim_im_out; j++)
|
|
||||||
{
|
|
||||||
for (k = 0; k < dim_im_out; k++)
|
|
||||||
{
|
|
||||||
conv_out = (bias[i] << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
for (m = 0; m < dim_kernel; m++)
|
|
||||||
{
|
|
||||||
for (n = 0; n < dim_kernel; n++)
|
|
||||||
{
|
|
||||||
// if-for implementation
|
|
||||||
in_row = stride * j + m - padding;
|
|
||||||
in_col = stride * k + n - padding;
|
|
||||||
if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in)
|
|
||||||
{
|
|
||||||
for (l = 0; l < ch_im_in; l++)
|
|
||||||
{
|
|
||||||
conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + l] *
|
|
||||||
wt[i * ch_im_in * dim_kernel * dim_kernel + (m * dim_kernel + n) * ch_im_in + l];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Im_out[i + (j * dim_im_out + k) * ch_im_out] = (q7_t)__SSAT((conv_out >> out_shift), 8);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* ARM_MATH_DSP */
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,378 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_convolve_HWC_q7_fast_nonsquare.c
|
|
||||||
* Description: Fast Q7 version of convolution (non-sqaure shape)
|
|
||||||
*
|
|
||||||
* $Date: July 20, 2021
|
|
||||||
* $Revision: V.1.1.2
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Fast Q7 convolution function (non-sqaure shape)
|
|
||||||
* @param[in] Im_in pointer to input tensor
|
|
||||||
* @param[in] dim_im_in_x input tensor dimention x
|
|
||||||
* @param[in] dim_im_in_y input tensor dimention y
|
|
||||||
* @param[in] ch_im_in number of input tensor channels
|
|
||||||
* @param[in] wt pointer to kernel weights
|
|
||||||
* @param[in] ch_im_out number of filters, i.e., output tensor channels
|
|
||||||
* @param[in] dim_kernel_x filter kernel size x
|
|
||||||
* @param[in] dim_kernel_y filter kernel size y
|
|
||||||
* @param[in] padding_x padding size x
|
|
||||||
* @param[in] padding_y padding size y
|
|
||||||
* @param[in] stride_x convolution stride x
|
|
||||||
* @param[in] stride_y convolution stride y
|
|
||||||
* @param[in] bias pointer to bias
|
|
||||||
* @param[in] bias_shift amount of left-shift for bias
|
|
||||||
* @param[in] out_shift amount of right-shift for output
|
|
||||||
* @param[in,out] Im_out pointer to output tensor
|
|
||||||
* @param[in] dim_im_out_x output tensor dimension x
|
|
||||||
* @param[in] dim_im_out_y output tensor dimension y
|
|
||||||
* @param[in,out] bufferA pointer to buffer space for input
|
|
||||||
* @param[in,out] bufferB pointer to buffer space for output
|
|
||||||
* @return The function returns either
|
|
||||||
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
|
|
||||||
*
|
|
||||||
* This function is the version with full list of optimization tricks, but with
|
|
||||||
* some constraints:
|
|
||||||
* ch_im_in is multiple of 4
|
|
||||||
* ch_im_out is multiple of 2
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_convolve_HWC_q7_fast_nonsquare(const q7_t *Im_in,
|
|
||||||
const uint16_t dim_im_in_x,
|
|
||||||
const uint16_t dim_im_in_y,
|
|
||||||
const uint16_t ch_im_in,
|
|
||||||
const q7_t *wt,
|
|
||||||
const uint16_t ch_im_out,
|
|
||||||
const uint16_t dim_kernel_x,
|
|
||||||
const uint16_t dim_kernel_y,
|
|
||||||
const uint16_t padding_x,
|
|
||||||
const uint16_t padding_y,
|
|
||||||
const uint16_t stride_x,
|
|
||||||
const uint16_t stride_y,
|
|
||||||
const q7_t *bias,
|
|
||||||
const uint16_t bias_shift,
|
|
||||||
const uint16_t out_shift,
|
|
||||||
q7_t *Im_out,
|
|
||||||
const uint16_t dim_im_out_x,
|
|
||||||
const uint16_t dim_im_out_y,
|
|
||||||
q15_t *bufferA,
|
|
||||||
q7_t *bufferB)
|
|
||||||
{
|
|
||||||
(void)bufferB;
|
|
||||||
#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
|
|
||||||
/* Run the following code for Cortex-M4 and Cortex-M7 */
|
|
||||||
|
|
||||||
int16_t i_out_y, i_out_x, i_ker_y, i_ker_x;
|
|
||||||
|
|
||||||
/* -----------------------
|
|
||||||
* Here we use bufferA as q15_t internally as computation are done with q15_t level
|
|
||||||
* im2col are done to output in q15_t format from q7_t input
|
|
||||||
*/
|
|
||||||
|
|
||||||
q15_t *pBuffer = bufferA;
|
|
||||||
q7_t *pOut = Im_out;
|
|
||||||
|
|
||||||
if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0)
|
|
||||||
{
|
|
||||||
/* check if the input dimension meets the constraints */
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Here we split the entire matrix into three regions depending on the padding situation
|
|
||||||
* Top: i_out_y from 0 to padding - 1
|
|
||||||
* Middle: i_out_y from padding to dim_im_out-padding-1
|
|
||||||
* Bottom: i_out_y from dim_im_out-padding to dim_im_out-1
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* top part */
|
|
||||||
for (i_out_y = 0; i_out_y < padding_y; i_out_y++)
|
|
||||||
{
|
|
||||||
for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++)
|
|
||||||
{
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y;
|
|
||||||
i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x;
|
|
||||||
i_ker_x++)
|
|
||||||
{
|
|
||||||
if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x)
|
|
||||||
{
|
|
||||||
/* arm_fill_q15(0, pBuffer, ch_im_in); */
|
|
||||||
memset(pBuffer, 0, sizeof(q15_t) * ch_im_in);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
arm_q7_to_q15_reordered_no_shift(
|
|
||||||
(q7_t *)Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer, ch_im_in);
|
|
||||||
}
|
|
||||||
pBuffer += ch_im_in;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y)
|
|
||||||
{
|
|
||||||
pOut = arm_nn_mat_mult_kernel_q7_q15_reordered(
|
|
||||||
wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, bias_shift, out_shift, bias, pOut);
|
|
||||||
/* counter reset */
|
|
||||||
pBuffer = bufferA;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* middle part, here we also divide the x into left, mid and right */
|
|
||||||
for (; i_out_y < dim_im_out_y - padding_y; i_out_y++)
|
|
||||||
{
|
|
||||||
|
|
||||||
/* left part */
|
|
||||||
for (i_out_x = 0; i_out_x < padding_x; i_out_x++)
|
|
||||||
{
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y;
|
|
||||||
i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x;
|
|
||||||
i_ker_x++)
|
|
||||||
{
|
|
||||||
if (i_ker_x < 0 || i_ker_x >= dim_im_in_x)
|
|
||||||
{
|
|
||||||
/* arm_fill_q15(0, pBuffer, ch_im_in); */
|
|
||||||
memset(pBuffer, 0, sizeof(q15_t) * ch_im_in);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
arm_q7_to_q15_reordered_no_shift(
|
|
||||||
(q7_t *)Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer, ch_im_in);
|
|
||||||
}
|
|
||||||
pBuffer += ch_im_in;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y)
|
|
||||||
{
|
|
||||||
pOut = arm_nn_mat_mult_kernel_q7_q15_reordered(
|
|
||||||
wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, bias_shift, out_shift, bias, pOut);
|
|
||||||
/* counter reset */
|
|
||||||
pBuffer = bufferA;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* mid part */
|
|
||||||
for (; i_out_x < dim_im_out_x - padding_x; i_out_x++)
|
|
||||||
{
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y;
|
|
||||||
i_ker_y++)
|
|
||||||
{
|
|
||||||
arm_q7_to_q15_reordered_no_shift(
|
|
||||||
(q7_t *)Im_in + (i_ker_y * dim_im_in_x + i_out_x * stride_x - padding_x) * ch_im_in,
|
|
||||||
pBuffer,
|
|
||||||
ch_im_in * dim_kernel_x);
|
|
||||||
pBuffer += ch_im_in * dim_kernel_x;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y)
|
|
||||||
{
|
|
||||||
pOut = arm_nn_mat_mult_kernel_q7_q15_reordered(
|
|
||||||
wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, bias_shift, out_shift, bias, pOut);
|
|
||||||
/* counter reset */
|
|
||||||
pBuffer = bufferA;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* right part */
|
|
||||||
for (; i_out_x < dim_im_out_x; i_out_x++)
|
|
||||||
{
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y;
|
|
||||||
i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x;
|
|
||||||
i_ker_x++)
|
|
||||||
{
|
|
||||||
if (i_ker_x < 0 || i_ker_x >= dim_im_in_x)
|
|
||||||
{
|
|
||||||
/* arm_fill_q15(0, pBuffer, ch_im_in); */
|
|
||||||
memset(pBuffer, 0, sizeof(q15_t) * ch_im_in);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
arm_q7_to_q15_reordered_no_shift(
|
|
||||||
(q7_t *)Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer, ch_im_in);
|
|
||||||
}
|
|
||||||
pBuffer += ch_im_in;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y)
|
|
||||||
{
|
|
||||||
pOut = arm_nn_mat_mult_kernel_q7_q15_reordered(
|
|
||||||
wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, bias_shift, out_shift, bias, pOut);
|
|
||||||
/* counter reset */
|
|
||||||
pBuffer = bufferA;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (; i_out_y < dim_im_out_y; i_out_y++)
|
|
||||||
{
|
|
||||||
for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++)
|
|
||||||
{
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y;
|
|
||||||
i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x;
|
|
||||||
i_ker_x++)
|
|
||||||
{
|
|
||||||
if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x)
|
|
||||||
{
|
|
||||||
/* arm_fill_q15(0, pBuffer, ch_im_in); */
|
|
||||||
memset(pBuffer, 0, sizeof(q15_t) * ch_im_in);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
arm_q7_to_q15_reordered_no_shift(
|
|
||||||
(q7_t *)Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer, ch_im_in);
|
|
||||||
}
|
|
||||||
pBuffer += ch_im_in;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (pBuffer == bufferA + 2 * ch_im_in * dim_kernel_x * dim_kernel_y)
|
|
||||||
{
|
|
||||||
pOut = arm_nn_mat_mult_kernel_q7_q15_reordered(
|
|
||||||
wt, bufferA, ch_im_out, ch_im_in * dim_kernel_x * dim_kernel_y, bias_shift, out_shift, bias, pOut);
|
|
||||||
/* counter reset */
|
|
||||||
pBuffer = bufferA;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* check if there is left-over for compute */
|
|
||||||
if (pBuffer != bufferA)
|
|
||||||
{
|
|
||||||
const q7_t *pA = wt;
|
|
||||||
int i;
|
|
||||||
for (i = 0; i < ch_im_out; i++)
|
|
||||||
{
|
|
||||||
q31_t sum = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
const q15_t *pB = bufferA;
|
|
||||||
/* basically each time it process 4 entries */
|
|
||||||
uint16_t colCnt = ch_im_in * dim_kernel_x * dim_kernel_y >> 2;
|
|
||||||
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
|
|
||||||
q31_t inA1, inA2;
|
|
||||||
q31_t inB1, inB2;
|
|
||||||
|
|
||||||
pA = read_and_pad_reordered(pA, &inA1, &inA2);
|
|
||||||
|
|
||||||
inB1 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
sum = __SMLAD(inA1, inB1, sum);
|
|
||||||
inB2 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
sum = __SMLAD(inA2, inB2, sum);
|
|
||||||
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
colCnt = (ch_im_in * dim_kernel_y * dim_kernel_x) & 0x3;
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q7_t inA1 = *pA++;
|
|
||||||
q15_t inB1 = *pB++;
|
|
||||||
sum += inA1 * inB1;
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
*pOut = (q7_t)__SSAT((sum >> out_shift), 8);
|
|
||||||
pOut++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
(void)bufferA;
|
|
||||||
/* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
|
|
||||||
int i, j, k, l, m, n;
|
|
||||||
int conv_out;
|
|
||||||
int in_row, in_col;
|
|
||||||
|
|
||||||
if (ch_im_in % 4 != 0 || ch_im_out % 2 != 0)
|
|
||||||
{
|
|
||||||
/* check if the input dimension meets the constraints */
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i = 0; i < ch_im_out; i++)
|
|
||||||
{
|
|
||||||
for (j = 0; j < dim_im_out_y; j++)
|
|
||||||
{
|
|
||||||
for (k = 0; k < dim_im_out_x; k++)
|
|
||||||
{
|
|
||||||
conv_out = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
for (m = 0; m < dim_kernel_y; m++)
|
|
||||||
{
|
|
||||||
for (n = 0; n < dim_kernel_x; n++)
|
|
||||||
{
|
|
||||||
/* if-for implementation */
|
|
||||||
in_row = stride_y * j + m - padding_y;
|
|
||||||
in_col = stride_x * k + n - padding_x;
|
|
||||||
if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x)
|
|
||||||
{
|
|
||||||
for (l = 0; l < ch_im_in; l++)
|
|
||||||
{
|
|
||||||
conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + l] *
|
|
||||||
wt[i * ch_im_in * dim_kernel_y * dim_kernel_x + (m * dim_kernel_x + n) * ch_im_in +
|
|
||||||
l];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Im_out[i + (j * dim_im_out_x + k) * ch_im_out] = (q7_t)__SSAT((conv_out >> out_shift), 8);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* ARM_MATH_DSP */
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,241 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_convolve_fast_s16.c
|
|
||||||
* Description: Optimized s16 version of convolution.
|
|
||||||
*
|
|
||||||
* $Date: 12 August 2021
|
|
||||||
* $Revision: V.1.1.0
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Basic s16 convolution function.
|
|
||||||
*
|
|
||||||
* Refer header file for details. Optimal use case for the DSP/MVE implementation is when input and output channels
|
|
||||||
* are multiples of 4 or atleast greater than 4.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_convolve_fast_s16(const cmsis_nn_context *ctx,
|
|
||||||
const cmsis_nn_conv_params *conv_params,
|
|
||||||
const cmsis_nn_per_channel_quant_params *quant_params,
|
|
||||||
const cmsis_nn_dims *input_dims,
|
|
||||||
const q15_t *input_data,
|
|
||||||
const cmsis_nn_dims *filter_dims,
|
|
||||||
const q7_t *filter_data,
|
|
||||||
const cmsis_nn_dims *bias_dims,
|
|
||||||
const int64_t *bias_data,
|
|
||||||
const cmsis_nn_dims *output_dims,
|
|
||||||
q15_t *output_data)
|
|
||||||
{
|
|
||||||
(void)bias_dims;
|
|
||||||
if (filter_dims->w * filter_dims->h * input_dims->c >= 512)
|
|
||||||
{
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ctx->buf == NULL && arm_convolve_s8_get_buffer_size(input_dims, filter_dims) > 0)
|
|
||||||
{
|
|
||||||
return ARM_MATH_ARGUMENT_ERROR;
|
|
||||||
}
|
|
||||||
q15_t *buffer_a = (q15_t *)ctx->buf;
|
|
||||||
|
|
||||||
const int32_t input_batches = input_dims->n;
|
|
||||||
const int32_t input_x = input_dims->w;
|
|
||||||
const int32_t input_y = input_dims->h;
|
|
||||||
const int32_t input_ch = input_dims->c;
|
|
||||||
const int32_t kernel_x = filter_dims->w;
|
|
||||||
const int32_t kernel_y = filter_dims->h;
|
|
||||||
const int32_t output_x = output_dims->w;
|
|
||||||
const int32_t output_y = output_dims->h;
|
|
||||||
const int32_t output_ch = output_dims->c;
|
|
||||||
|
|
||||||
const int32_t pad_x = conv_params->padding.w;
|
|
||||||
const int32_t pad_y = conv_params->padding.h;
|
|
||||||
const int32_t stride_x = conv_params->stride.w;
|
|
||||||
const int32_t stride_y = conv_params->stride.h;
|
|
||||||
|
|
||||||
const int16_t out_activation_min = conv_params->activation.min;
|
|
||||||
const int16_t out_activation_max = conv_params->activation.max;
|
|
||||||
int32_t *output_mult = quant_params->multiplier;
|
|
||||||
int32_t *output_shift = quant_params->shift;
|
|
||||||
|
|
||||||
for (int i_batch = 0; i_batch < input_batches; i_batch++)
|
|
||||||
{
|
|
||||||
#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
|
|
||||||
/* Generate two columns from the input tensor a GEMM computation */
|
|
||||||
q15_t *two_column_buf = buffer_a;
|
|
||||||
q15_t *out = output_data;
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (int32_t i_out_y = 0; i_out_y < output_y; i_out_y++)
|
|
||||||
{
|
|
||||||
for (int32_t i_out_x = 0; i_out_x < output_x; i_out_x++)
|
|
||||||
{
|
|
||||||
for (int32_t i_ker_y = i_out_y * stride_y - pad_y; i_ker_y < i_out_y * stride_y - pad_y + kernel_y;
|
|
||||||
i_ker_y++)
|
|
||||||
{
|
|
||||||
for (int32_t i_ker_x = i_out_x * stride_x - pad_x; i_ker_x < i_out_x * stride_x - pad_x + kernel_x;
|
|
||||||
i_ker_x++)
|
|
||||||
{
|
|
||||||
if (i_ker_y < 0 || i_ker_y >= input_y || i_ker_x < 0 || i_ker_x >= input_x)
|
|
||||||
{
|
|
||||||
/* Filling 0 for out-of-bound paddings */
|
|
||||||
arm_memset_q7((q7_t *)two_column_buf, 0, sizeof(q15_t) * input_ch);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
arm_memcpy_q7((q7_t *)two_column_buf,
|
|
||||||
(const q7_t *)(input_data + (i_ker_y * input_x + i_ker_x) * input_ch),
|
|
||||||
input_ch * sizeof(q15_t));
|
|
||||||
}
|
|
||||||
two_column_buf += input_ch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* Computation is filed for every 2 columns */
|
|
||||||
if (two_column_buf == buffer_a + 2 * input_ch * kernel_y * kernel_x)
|
|
||||||
{
|
|
||||||
out = arm_nn_mat_mult_kernel_s16(filter_data,
|
|
||||||
buffer_a,
|
|
||||||
output_ch,
|
|
||||||
output_shift,
|
|
||||||
output_mult,
|
|
||||||
out_activation_min,
|
|
||||||
out_activation_max,
|
|
||||||
(input_ch * kernel_y * kernel_x),
|
|
||||||
bias_data,
|
|
||||||
out);
|
|
||||||
|
|
||||||
/* Counter reset */
|
|
||||||
two_column_buf = buffer_a;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Left-over because odd number of output pixels */
|
|
||||||
if (two_column_buf != buffer_a)
|
|
||||||
{
|
|
||||||
const q7_t *ker_a = filter_data;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < output_ch; i++)
|
|
||||||
{
|
|
||||||
/* Init the accumulator*/
|
|
||||||
q31_t sum = 0;
|
|
||||||
|
|
||||||
/* Point to the beginning of the im2col buffer where the input is available as a rearranged column */
|
|
||||||
const q15_t *ip_as_col = buffer_a;
|
|
||||||
|
|
||||||
/* 4 multiply and accumulates are done in one loop. */
|
|
||||||
uint16_t col_count = (input_ch * kernel_y * kernel_x) >> 2;
|
|
||||||
|
|
||||||
while (col_count)
|
|
||||||
{
|
|
||||||
q31_t ker_a1, ker_a2;
|
|
||||||
q31_t ip_b1, ip_b2;
|
|
||||||
|
|
||||||
ker_a = read_and_pad(ker_a, &ker_a1, &ker_a2);
|
|
||||||
|
|
||||||
ip_b1 = arm_nn_read_q15x2_ia(&ip_as_col);
|
|
||||||
sum = __SMLAD(ker_a1, ip_b1, sum);
|
|
||||||
ip_b2 = arm_nn_read_q15x2_ia(&ip_as_col);
|
|
||||||
sum = __SMLAD(ker_a2, ip_b2, sum);
|
|
||||||
|
|
||||||
col_count--;
|
|
||||||
}
|
|
||||||
/* Handle left over mac */
|
|
||||||
col_count = input_ch * kernel_y * kernel_x & 0x3;
|
|
||||||
while (col_count)
|
|
||||||
{
|
|
||||||
q7_t ker_a1 = *ker_a++;
|
|
||||||
q15_t ip_b1 = *ip_as_col++;
|
|
||||||
sum += ker_a1 * ip_b1;
|
|
||||||
col_count--;
|
|
||||||
}
|
|
||||||
if (bias_data)
|
|
||||||
{
|
|
||||||
q31_t reduced_multiplier = REDUCE_MULTIPLIER(output_mult[i]);
|
|
||||||
q63_t acc_64 = sum + bias_data[i];
|
|
||||||
sum = arm_nn_requantize_s64(acc_64, reduced_multiplier, output_shift[i]);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
sum = arm_nn_requantize(sum, output_mult[i], output_shift[i]);
|
|
||||||
}
|
|
||||||
sum = MAX(sum, out_activation_min);
|
|
||||||
sum = MIN(sum, out_activation_max);
|
|
||||||
*out++ = (q15_t)sum;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
(void)input_data;
|
|
||||||
(void)output_data;
|
|
||||||
(void)bias_data;
|
|
||||||
(void)filter_data;
|
|
||||||
(void)buffer_a;
|
|
||||||
(void)kernel_x;
|
|
||||||
(void)kernel_y;
|
|
||||||
(void)pad_x;
|
|
||||||
(void)pad_y;
|
|
||||||
(void)stride_x;
|
|
||||||
(void)stride_y;
|
|
||||||
(void)out_activation_min;
|
|
||||||
(void)out_activation_max;
|
|
||||||
(void)output_mult;
|
|
||||||
(void)output_shift;
|
|
||||||
return ARM_MATH_ARGUMENT_ERROR;
|
|
||||||
#endif
|
|
||||||
/* Advance to the next batch */
|
|
||||||
input_data += (input_x * input_y * input_ch);
|
|
||||||
output_data += (output_x * output_y * output_ch);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
int32_t arm_convolve_fast_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
|
|
||||||
{
|
|
||||||
#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
|
|
||||||
return (2 * input_dims->c * filter_dims->w * filter_dims->h) * (int32_t)sizeof(int16_t);
|
|
||||||
#else
|
|
||||||
(void)input_dims;
|
|
||||||
(void)filter_dims;
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,156 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2022 Arm Limited or its affiliates.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_convolve_s16.c
|
|
||||||
* Description: s16 version of convolution using symmetric quantization.
|
|
||||||
*
|
|
||||||
* $Date: January 13, 2022
|
|
||||||
* $Revision: V.1.1.0
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Basic s16 convolution function.
|
|
||||||
*
|
|
||||||
* Refer header file for details. Optimal use case for the DSP/MVE implementation is when input and output channels
|
|
||||||
* are multiples of 4 or atleast greater than 4.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_convolve_s16(const cmsis_nn_context *ctx,
|
|
||||||
const cmsis_nn_conv_params *conv_params,
|
|
||||||
const cmsis_nn_per_channel_quant_params *quant_params,
|
|
||||||
const cmsis_nn_dims *input_dims,
|
|
||||||
const q15_t *input_data,
|
|
||||||
const cmsis_nn_dims *filter_dims,
|
|
||||||
const q7_t *filter_data,
|
|
||||||
const cmsis_nn_dims *bias_dims,
|
|
||||||
const int64_t *bias_data,
|
|
||||||
const cmsis_nn_dims *output_dims,
|
|
||||||
q15_t *output_data)
|
|
||||||
{
|
|
||||||
(void)bias_dims;
|
|
||||||
(void)ctx;
|
|
||||||
|
|
||||||
const int32_t input_batches = input_dims->n;
|
|
||||||
const int32_t input_x = input_dims->w;
|
|
||||||
const int32_t input_y = input_dims->h;
|
|
||||||
const int32_t input_ch = input_dims->c;
|
|
||||||
const int32_t kernel_x = filter_dims->w;
|
|
||||||
const int32_t kernel_y = filter_dims->h;
|
|
||||||
const int32_t output_x = output_dims->w;
|
|
||||||
const int32_t output_y = output_dims->h;
|
|
||||||
const int32_t output_ch = output_dims->c;
|
|
||||||
|
|
||||||
const int32_t pad_x = conv_params->padding.w;
|
|
||||||
const int32_t pad_y = conv_params->padding.h;
|
|
||||||
const int32_t stride_x = conv_params->stride.w;
|
|
||||||
const int32_t stride_y = conv_params->stride.h;
|
|
||||||
const int32_t dilation_x = conv_params->dilation.w;
|
|
||||||
const int32_t dilation_y = conv_params->dilation.h;
|
|
||||||
|
|
||||||
const int32_t out_activation_min = conv_params->activation.min;
|
|
||||||
const int32_t out_activation_max = conv_params->activation.max;
|
|
||||||
int32_t *output_mult = quant_params->multiplier;
|
|
||||||
int32_t *output_shift = quant_params->shift;
|
|
||||||
|
|
||||||
for (int i_batch = 0; i_batch < input_batches; i_batch++)
|
|
||||||
{
|
|
||||||
/* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
|
|
||||||
for (int32_t i_out_ch = 0; i_out_ch < output_ch; i_out_ch++)
|
|
||||||
{
|
|
||||||
const q31_t reduced_multiplier = REDUCE_MULTIPLIER(output_mult[i_out_ch]);
|
|
||||||
|
|
||||||
for (int32_t base_idx_y = -pad_y, i_out_y = 0; i_out_y < output_y; base_idx_y += stride_y, i_out_y++)
|
|
||||||
{
|
|
||||||
for (int32_t base_idx_x = -pad_x, i_out_x = 0; i_out_x < output_x; base_idx_x += stride_x, i_out_x++)
|
|
||||||
{
|
|
||||||
int64_t conv_out_acc = 0;
|
|
||||||
|
|
||||||
const int32_t start_y_max = (-base_idx_y + dilation_y - 1) / dilation_y;
|
|
||||||
const int32_t ker_y_start = MAX(0, start_y_max);
|
|
||||||
const int32_t start_x_max = (-base_idx_x + dilation_x - 1) / dilation_x;
|
|
||||||
const int32_t ker_x_start = MAX(0, start_x_max);
|
|
||||||
const int32_t end_min_y = (input_y - base_idx_y + dilation_y - 1) / dilation_y;
|
|
||||||
const int32_t ker_y_end = MIN(kernel_y, end_min_y);
|
|
||||||
const int32_t end_min_x = (input_x - base_idx_x + dilation_x - 1) / dilation_x;
|
|
||||||
const int32_t ker_x_end = MIN(kernel_x, end_min_x);
|
|
||||||
|
|
||||||
for (int32_t i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++)
|
|
||||||
{
|
|
||||||
for (int32_t i_ker_x = ker_x_start; i_ker_x < ker_x_end; i_ker_x++)
|
|
||||||
{
|
|
||||||
const int32_t in_row = base_idx_y + dilation_y * i_ker_y;
|
|
||||||
const int32_t in_col = base_idx_x + dilation_x * i_ker_x;
|
|
||||||
|
|
||||||
for (int32_t i_input_ch = 0; i_input_ch < input_ch; i_input_ch++)
|
|
||||||
{
|
|
||||||
conv_out_acc += input_data[(in_row * input_x + in_col) * input_ch + i_input_ch] *
|
|
||||||
filter_data[i_out_ch * input_ch * kernel_y * kernel_x +
|
|
||||||
(i_ker_y * kernel_x + i_ker_x) * input_ch + i_input_ch];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bias_data)
|
|
||||||
{
|
|
||||||
conv_out_acc += bias_data[i_out_ch];
|
|
||||||
}
|
|
||||||
|
|
||||||
int32_t conv_out = arm_nn_requantize_s64(conv_out_acc, reduced_multiplier, output_shift[i_out_ch]);
|
|
||||||
conv_out = MAX(conv_out, out_activation_min);
|
|
||||||
conv_out = MIN(conv_out, out_activation_max);
|
|
||||||
output_data[i_out_ch + (i_out_y * output_x + i_out_x) * output_ch] = (int16_t)conv_out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* Advance to the next batch */
|
|
||||||
input_data += (input_x * input_y * input_ch);
|
|
||||||
output_data += (output_x * output_y * output_ch);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
int32_t arm_convolve_s16_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
|
|
||||||
{
|
|
||||||
(void)input_dims;
|
|
||||||
(void)filter_dims;
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,335 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_convolve_s8.c
|
|
||||||
* Description: s8 version of convolution using symmetric quantization.
|
|
||||||
*
|
|
||||||
* $Date: December 14, 2021
|
|
||||||
* $Revision: V.2.1.0
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Basic s8 convolution function.
|
|
||||||
*
|
|
||||||
* Refer header file for details. Optimal use case for the DSP/MVE implementation is when input and output channels
|
|
||||||
* are multiples of 4 or atleast greater than 4.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_convolve_s8(const cmsis_nn_context *ctx,
|
|
||||||
const cmsis_nn_conv_params *conv_params,
|
|
||||||
const cmsis_nn_per_channel_quant_params *quant_params,
|
|
||||||
const cmsis_nn_dims *input_dims,
|
|
||||||
const q7_t *input_data,
|
|
||||||
const cmsis_nn_dims *filter_dims,
|
|
||||||
const q7_t *filter_data,
|
|
||||||
const cmsis_nn_dims *bias_dims,
|
|
||||||
const int32_t *bias_data,
|
|
||||||
const cmsis_nn_dims *output_dims,
|
|
||||||
q7_t *output_data)
|
|
||||||
{
|
|
||||||
(void)bias_dims;
|
|
||||||
|
|
||||||
if (ctx->buf == NULL && arm_convolve_s8_get_buffer_size(input_dims, filter_dims) > 0)
|
|
||||||
{
|
|
||||||
return ARM_MATH_ARGUMENT_ERROR;
|
|
||||||
}
|
|
||||||
q15_t *buffer_a = (q15_t *)ctx->buf;
|
|
||||||
|
|
||||||
const int32_t input_batches = input_dims->n;
|
|
||||||
const uint16_t input_x = input_dims->w;
|
|
||||||
const uint16_t input_y = input_dims->h;
|
|
||||||
const uint16_t input_ch = input_dims->c;
|
|
||||||
const uint16_t kernel_x = filter_dims->w;
|
|
||||||
const uint16_t kernel_y = filter_dims->h;
|
|
||||||
const uint16_t output_x = output_dims->w;
|
|
||||||
const uint16_t output_y = output_dims->h;
|
|
||||||
const uint16_t output_ch = output_dims->c;
|
|
||||||
|
|
||||||
const uint16_t pad_x = conv_params->padding.w;
|
|
||||||
const uint16_t pad_y = conv_params->padding.h;
|
|
||||||
const uint16_t stride_x = conv_params->stride.w;
|
|
||||||
const uint16_t stride_y = conv_params->stride.h;
|
|
||||||
|
|
||||||
const int32_t input_offset = conv_params->input_offset;
|
|
||||||
const int32_t out_offset = conv_params->output_offset;
|
|
||||||
const int32_t out_activation_min = conv_params->activation.min;
|
|
||||||
const int32_t out_activation_max = conv_params->activation.max;
|
|
||||||
int32_t *output_mult = quant_params->multiplier;
|
|
||||||
int32_t *output_shift = quant_params->shift;
|
|
||||||
|
|
||||||
int i_batch;
|
|
||||||
for (i_batch = 0; i_batch < input_batches; i_batch++)
|
|
||||||
{
|
|
||||||
#if defined(ARM_MATH_MVEI)
|
|
||||||
/* Generate upto four columns from the input tensor a GEMM computation */
|
|
||||||
q7_t *im2col_buf = (q7_t *)buffer_a;
|
|
||||||
q7_t *out = output_data;
|
|
||||||
int32_t buffer_fill_cnt = 0;
|
|
||||||
int32_t padded = 0;
|
|
||||||
const int32_t num_elem = kernel_x * kernel_y * input_ch;
|
|
||||||
const int32_t dilation_x = conv_params->dilation.w;
|
|
||||||
const int32_t dilation_y = conv_params->dilation.h;
|
|
||||||
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (int i_out_y = 0; i_out_y < output_y; i_out_y++)
|
|
||||||
{
|
|
||||||
for (int i_out_x = 0; i_out_x < output_x; i_out_x++)
|
|
||||||
{
|
|
||||||
const int32_t base_idx_x = stride_x * i_out_x - pad_x;
|
|
||||||
const int32_t base_idx_y = stride_y * i_out_y - pad_y;
|
|
||||||
|
|
||||||
for (int32_t i_ker_y = 0; i_ker_y < kernel_y; i_ker_y++)
|
|
||||||
{
|
|
||||||
for (int32_t i_ker_x = 0; i_ker_x < kernel_x; i_ker_x++)
|
|
||||||
{
|
|
||||||
const int32_t k_y = base_idx_y + dilation_y * i_ker_y;
|
|
||||||
const int32_t k_x = base_idx_x + dilation_x * i_ker_x;
|
|
||||||
|
|
||||||
if (k_y < 0 || k_y >= input_y || k_x < 0 || k_x >= input_x)
|
|
||||||
{
|
|
||||||
memset(im2col_buf, (int8_t)-input_offset, sizeof(q7_t) * input_ch);
|
|
||||||
padded = 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
arm_memcpy_q7(im2col_buf, input_data + (k_y * input_x + k_x) * input_ch, input_ch);
|
|
||||||
}
|
|
||||||
im2col_buf += input_ch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
buffer_fill_cnt++;
|
|
||||||
|
|
||||||
/* Computation is filed for every 4 columns */
|
|
||||||
if (buffer_fill_cnt == 4 && (padded == 0))
|
|
||||||
{
|
|
||||||
buffer_fill_cnt = 0;
|
|
||||||
out = arm_nn_mat_mul_core_4x_s8(num_elem,
|
|
||||||
num_elem,
|
|
||||||
(q7_t *)buffer_a,
|
|
||||||
filter_data,
|
|
||||||
output_ch,
|
|
||||||
conv_params,
|
|
||||||
quant_params,
|
|
||||||
bias_data,
|
|
||||||
out);
|
|
||||||
im2col_buf = (q7_t *)buffer_a;
|
|
||||||
}
|
|
||||||
else if (buffer_fill_cnt == 4 && (padded != 0))
|
|
||||||
{
|
|
||||||
buffer_fill_cnt = 0;
|
|
||||||
out = arm_nn_mat_mult_s8(filter_data,
|
|
||||||
(q7_t *)buffer_a,
|
|
||||||
output_ch,
|
|
||||||
4,
|
|
||||||
output_shift,
|
|
||||||
output_mult,
|
|
||||||
out_offset,
|
|
||||||
input_offset,
|
|
||||||
0,
|
|
||||||
out_activation_min,
|
|
||||||
out_activation_max,
|
|
||||||
num_elem,
|
|
||||||
bias_data,
|
|
||||||
out);
|
|
||||||
|
|
||||||
im2col_buf = (q7_t *)buffer_a;
|
|
||||||
padded = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* Handle left over columns */
|
|
||||||
if (buffer_fill_cnt != 0)
|
|
||||||
{
|
|
||||||
out = arm_nn_mat_mult_s8(filter_data,
|
|
||||||
(q7_t *)buffer_a,
|
|
||||||
output_ch,
|
|
||||||
buffer_fill_cnt,
|
|
||||||
output_shift,
|
|
||||||
output_mult,
|
|
||||||
out_offset,
|
|
||||||
input_offset,
|
|
||||||
0,
|
|
||||||
out_activation_min,
|
|
||||||
out_activation_max,
|
|
||||||
num_elem,
|
|
||||||
bias_data,
|
|
||||||
out);
|
|
||||||
}
|
|
||||||
#else // #if defined(ARM_MATH_MVEI)
|
|
||||||
const uint16_t dilation_x = conv_params->dilation.w;
|
|
||||||
const uint16_t dilation_y = conv_params->dilation.h;
|
|
||||||
|
|
||||||
int32_t i_out_y, i_out_x, i_ker_y, i_ker_x;
|
|
||||||
|
|
||||||
/* Generate two columns from the input tensor a GEMM computation */
|
|
||||||
q15_t *two_column_buf = buffer_a;
|
|
||||||
q7_t *out = output_data;
|
|
||||||
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (i_out_y = 0; i_out_y < output_y; i_out_y++)
|
|
||||||
{
|
|
||||||
for (i_out_x = 0; i_out_x < output_x; i_out_x++)
|
|
||||||
{
|
|
||||||
const int32_t base_idx_y = stride_y * i_out_y - pad_y;
|
|
||||||
const int32_t base_idx_x = stride_x * i_out_x - pad_x;
|
|
||||||
|
|
||||||
for (i_ker_y = 0; i_ker_y < kernel_y; i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = 0; i_ker_x < kernel_x; i_ker_x++)
|
|
||||||
{
|
|
||||||
const int32_t k_y = base_idx_y + dilation_y * i_ker_y;
|
|
||||||
const int32_t k_x = base_idx_x + dilation_x * i_ker_x;
|
|
||||||
|
|
||||||
if (k_y < 0 || k_y >= input_y || k_x < 0 || k_x >= input_x)
|
|
||||||
{
|
|
||||||
/* Filling 0 for out-of-bound paddings */
|
|
||||||
memset(two_column_buf, 0, sizeof(q15_t) * input_ch);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* Copying the pixel data to column */
|
|
||||||
arm_q7_to_q15_with_offset(
|
|
||||||
input_data + (k_y * input_x + k_x) * input_ch, two_column_buf, input_ch, input_offset);
|
|
||||||
}
|
|
||||||
two_column_buf += input_ch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Computation is filed for every 2 columns */
|
|
||||||
if (two_column_buf == buffer_a + 2 * input_ch * kernel_y * kernel_x)
|
|
||||||
{
|
|
||||||
out = arm_nn_mat_mult_kernel_s8_s16(filter_data,
|
|
||||||
buffer_a,
|
|
||||||
output_ch,
|
|
||||||
output_shift,
|
|
||||||
output_mult,
|
|
||||||
out_offset,
|
|
||||||
out_activation_min,
|
|
||||||
out_activation_max,
|
|
||||||
input_ch * kernel_y * kernel_x,
|
|
||||||
bias_data,
|
|
||||||
out);
|
|
||||||
|
|
||||||
/* counter reset */
|
|
||||||
two_column_buf = buffer_a;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* left-over because odd number of output pixels */
|
|
||||||
if (two_column_buf != buffer_a)
|
|
||||||
{
|
|
||||||
const q7_t *ker_a = filter_data;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for (i = 0; i < output_ch; i++)
|
|
||||||
{
|
|
||||||
/* Load the accumulator with bias first */
|
|
||||||
q31_t sum = 0;
|
|
||||||
if (bias_data)
|
|
||||||
{
|
|
||||||
sum = bias_data[i];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Point to the beginning of the im2col buffer where the input is available as a rearranged column */
|
|
||||||
const q15_t *ip_as_col = buffer_a;
|
|
||||||
|
|
||||||
/* 4 multiply and accumulates are done in one loop. */
|
|
||||||
#if defined(ARM_MATH_DSP)
|
|
||||||
uint16_t col_count = (input_ch * kernel_y * kernel_x) >> 2;
|
|
||||||
|
|
||||||
while (col_count)
|
|
||||||
{
|
|
||||||
q31_t ker_a1, ker_a2;
|
|
||||||
q31_t ip_b1, ip_b2;
|
|
||||||
|
|
||||||
ker_a = read_and_pad(ker_a, &ker_a1, &ker_a2);
|
|
||||||
|
|
||||||
ip_b1 = arm_nn_read_q15x2_ia(&ip_as_col);
|
|
||||||
sum = __SMLAD(ker_a1, ip_b1, sum);
|
|
||||||
ip_b2 = arm_nn_read_q15x2_ia(&ip_as_col);
|
|
||||||
sum = __SMLAD(ker_a2, ip_b2, sum);
|
|
||||||
|
|
||||||
col_count--;
|
|
||||||
}
|
|
||||||
/* Handle left over mac */
|
|
||||||
col_count = input_ch * kernel_y * kernel_x & 0x3;
|
|
||||||
#else
|
|
||||||
uint16_t col_count = input_ch * kernel_y * kernel_x;
|
|
||||||
#endif
|
|
||||||
while (col_count)
|
|
||||||
{
|
|
||||||
q7_t ker_a1 = *ker_a++;
|
|
||||||
q15_t ip_b1 = *ip_as_col++;
|
|
||||||
sum += ker_a1 * ip_b1;
|
|
||||||
col_count--;
|
|
||||||
}
|
|
||||||
|
|
||||||
sum = arm_nn_requantize(sum, output_mult[i], output_shift[i]);
|
|
||||||
sum += out_offset;
|
|
||||||
sum = MAX(sum, out_activation_min);
|
|
||||||
sum = MIN(sum, out_activation_max);
|
|
||||||
*out++ = (q7_t)sum;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif // #if defined(ARM_MATH_MVEI)
|
|
||||||
/* Advance to the next batch */
|
|
||||||
input_data += (input_x * input_y * input_ch);
|
|
||||||
output_data += (output_x * output_y * output_ch);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
int32_t arm_convolve_s8_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
|
|
||||||
{
|
|
||||||
#if defined(ARM_MATH_MVEI)
|
|
||||||
int32_t col_length = input_dims->c * filter_dims->w * filter_dims->h;
|
|
||||||
// Get number of complete int16 lanes(multiple of 8) for given col_length. This is dependent on
|
|
||||||
// implementation of arm_nn_mat_mult_s8
|
|
||||||
col_length = (col_length + 7) / 8;
|
|
||||||
// 4 -> number of im2col buffers, 8 -> 8 elements per Q register
|
|
||||||
return 4 * col_length * 8 * (int32_t)sizeof(int8_t);
|
|
||||||
#else
|
|
||||||
return (2 * input_dims->c * filter_dims->w * filter_dims->h) * (int32_t)sizeof(int16_t);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,130 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2021-2022 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_convolve_wrapper_s16.c
|
|
||||||
* Description: s16 convolution layer wrapper function with the main purpose to call the optimal kernel available in
|
|
||||||
* cmsis-nn to perform the convolution.
|
|
||||||
*
|
|
||||||
* $Date: 13 January 2022
|
|
||||||
* $Revision: V.1.2.0
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Convolution layer
|
|
||||||
*
|
|
||||||
* Refer header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_convolve_wrapper_s16(const cmsis_nn_context *ctx,
|
|
||||||
const cmsis_nn_conv_params *conv_params,
|
|
||||||
const cmsis_nn_per_channel_quant_params *quant_params,
|
|
||||||
const cmsis_nn_dims *input_dims,
|
|
||||||
const q15_t *input_data,
|
|
||||||
const cmsis_nn_dims *filter_dims,
|
|
||||||
const q7_t *filter_data,
|
|
||||||
const cmsis_nn_dims *bias_dims,
|
|
||||||
const int64_t *bias_data,
|
|
||||||
const cmsis_nn_dims *output_dims,
|
|
||||||
q15_t *output_data)
|
|
||||||
{
|
|
||||||
#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
|
|
||||||
if (filter_dims->w * filter_dims->h * input_dims->c < 512 &&
|
|
||||||
(conv_params->dilation.w == 1 && conv_params->dilation.h == 1))
|
|
||||||
{
|
|
||||||
return arm_convolve_fast_s16(ctx,
|
|
||||||
conv_params,
|
|
||||||
quant_params,
|
|
||||||
input_dims,
|
|
||||||
input_data,
|
|
||||||
filter_dims,
|
|
||||||
filter_data,
|
|
||||||
bias_dims,
|
|
||||||
bias_data,
|
|
||||||
output_dims,
|
|
||||||
output_data);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return arm_convolve_s16(ctx,
|
|
||||||
conv_params,
|
|
||||||
quant_params,
|
|
||||||
input_dims,
|
|
||||||
input_data,
|
|
||||||
filter_dims,
|
|
||||||
filter_data,
|
|
||||||
bias_dims,
|
|
||||||
bias_data,
|
|
||||||
output_dims,
|
|
||||||
output_data);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
return arm_convolve_s16(ctx,
|
|
||||||
conv_params,
|
|
||||||
quant_params,
|
|
||||||
input_dims,
|
|
||||||
input_data,
|
|
||||||
filter_dims,
|
|
||||||
filter_data,
|
|
||||||
bias_dims,
|
|
||||||
bias_data,
|
|
||||||
output_dims,
|
|
||||||
output_data);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
int32_t arm_convolve_wrapper_s16_get_buffer_size(const cmsis_nn_conv_params *conv_params,
|
|
||||||
const cmsis_nn_dims *input_dims,
|
|
||||||
const cmsis_nn_dims *filter_dims,
|
|
||||||
const cmsis_nn_dims *output_dims)
|
|
||||||
{
|
|
||||||
(void)conv_params;
|
|
||||||
(void)output_dims;
|
|
||||||
|
|
||||||
#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
|
|
||||||
if (filter_dims->w * filter_dims->h * input_dims->c < 512 &&
|
|
||||||
(conv_params->dilation.w == 1 && conv_params->dilation.h == 1))
|
|
||||||
{
|
|
||||||
return arm_convolve_fast_s16_get_buffer_size(input_dims, filter_dims);
|
|
||||||
}
|
|
||||||
|
|
||||||
return arm_convolve_s16_get_buffer_size(input_dims, filter_dims);
|
|
||||||
#else
|
|
||||||
return arm_convolve_s16_get_buffer_size(input_dims, filter_dims);
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,133 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_convolve_wrapper_s8.c
|
|
||||||
* Description: s8 convolution layer wrapper function with the main purpose to call the optimal kernel available in
|
|
||||||
* cmsis-nn to perform the convolution.
|
|
||||||
*
|
|
||||||
* $Date: 02. December 2021
|
|
||||||
* $Revision: V.1.1.0
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Convolution layer
|
|
||||||
*
|
|
||||||
* Refer header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_convolve_wrapper_s8(const cmsis_nn_context *ctx,
|
|
||||||
const cmsis_nn_conv_params *conv_params,
|
|
||||||
const cmsis_nn_per_channel_quant_params *quant_params,
|
|
||||||
const cmsis_nn_dims *input_dims,
|
|
||||||
const q7_t *input_data,
|
|
||||||
const cmsis_nn_dims *filter_dims,
|
|
||||||
const q7_t *filter_data,
|
|
||||||
const cmsis_nn_dims *bias_dims,
|
|
||||||
const int32_t *bias_data,
|
|
||||||
const cmsis_nn_dims *output_dims,
|
|
||||||
q7_t *output_data)
|
|
||||||
{
|
|
||||||
if ((conv_params->padding.w == 0) && (conv_params->padding.h == 0) && (input_dims->c % 4 == 0) &&
|
|
||||||
(conv_params->stride.w == 1) && (conv_params->stride.h == 1) && (filter_dims->w == 1) &&
|
|
||||||
(filter_dims->h == 1) && (conv_params->dilation.w == 1 && conv_params->dilation.h == 1))
|
|
||||||
{
|
|
||||||
return arm_convolve_1x1_s8_fast(ctx,
|
|
||||||
conv_params,
|
|
||||||
quant_params,
|
|
||||||
input_dims,
|
|
||||||
input_data,
|
|
||||||
filter_dims,
|
|
||||||
filter_data,
|
|
||||||
bias_dims,
|
|
||||||
bias_data,
|
|
||||||
output_dims,
|
|
||||||
output_data);
|
|
||||||
}
|
|
||||||
else if ((output_dims->h == 1) && (input_dims->h == 1) && (filter_dims->h == 1) && (output_dims->w % 4 == 0) &&
|
|
||||||
(input_dims->n == 1) && (conv_params->dilation.w == 1 && conv_params->dilation.h == 1))
|
|
||||||
{
|
|
||||||
return arm_convolve_1_x_n_s8(ctx,
|
|
||||||
conv_params,
|
|
||||||
quant_params,
|
|
||||||
input_dims,
|
|
||||||
input_data,
|
|
||||||
filter_dims,
|
|
||||||
filter_data,
|
|
||||||
bias_dims,
|
|
||||||
bias_data,
|
|
||||||
output_dims,
|
|
||||||
output_data);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return arm_convolve_s8(ctx,
|
|
||||||
conv_params,
|
|
||||||
quant_params,
|
|
||||||
input_dims,
|
|
||||||
input_data,
|
|
||||||
filter_dims,
|
|
||||||
filter_data,
|
|
||||||
bias_dims,
|
|
||||||
bias_data,
|
|
||||||
output_dims,
|
|
||||||
output_data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
int32_t arm_convolve_wrapper_s8_get_buffer_size(const cmsis_nn_conv_params *conv_params,
|
|
||||||
const cmsis_nn_dims *input_dims,
|
|
||||||
const cmsis_nn_dims *filter_dims,
|
|
||||||
const cmsis_nn_dims *output_dims)
|
|
||||||
{
|
|
||||||
if ((conv_params->padding.w == 0) && (conv_params->padding.h == 0) && (input_dims->c % 4 == 0) &&
|
|
||||||
(conv_params->stride.w == 1) && (conv_params->stride.h == 1) && (filter_dims->w == 1) &&
|
|
||||||
(filter_dims->h == 1) && (conv_params->dilation.w == 1 && conv_params->dilation.h == 1))
|
|
||||||
{
|
|
||||||
return arm_convolve_1x1_s8_fast_get_buffer_size(input_dims);
|
|
||||||
}
|
|
||||||
else if ((output_dims->h == 1) && (input_dims->h == 1) && (filter_dims->h == 1) && (output_dims->w % 4 == 0) &&
|
|
||||||
(input_dims->n == 1) && (conv_params->dilation.w == 1 && conv_params->dilation.h == 1))
|
|
||||||
{
|
|
||||||
return arm_convolve_1_x_n_s8_get_buffer_size(input_dims, filter_dims);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
return arm_convolve_s8_get_buffer_size(input_dims, filter_dims);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,212 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_depthwise_conv_3x3_s8.c
|
|
||||||
* Description: Optimized s8 depthwise convolution function for channel
|
|
||||||
* multiplier of 1 and 3x3 kernel size.
|
|
||||||
*
|
|
||||||
* $Date: 09. October 2020
|
|
||||||
* $Revision: V.2.0.1
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M CPUs
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Optimized s8 depthwise convolution function with constraint that
|
|
||||||
* in_channel == out_channel and kernel_x == kernel_y == 3 with pads at most 1
|
|
||||||
*
|
|
||||||
* Refer prototype header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_depthwise_conv_3x3_s8(const cmsis_nn_context *ctx,
|
|
||||||
const cmsis_nn_dw_conv_params *dw_conv_params,
|
|
||||||
const cmsis_nn_per_channel_quant_params *quant_params,
|
|
||||||
const cmsis_nn_dims *input_dims,
|
|
||||||
const q7_t *input,
|
|
||||||
const cmsis_nn_dims *filter_dims,
|
|
||||||
const q7_t *kernel,
|
|
||||||
const cmsis_nn_dims *bias_dims,
|
|
||||||
const int32_t *bias,
|
|
||||||
const cmsis_nn_dims *output_dims,
|
|
||||||
q7_t *output)
|
|
||||||
{
|
|
||||||
(void)ctx;
|
|
||||||
(void)bias_dims;
|
|
||||||
|
|
||||||
const int32_t input_x = input_dims->w;
|
|
||||||
const int32_t input_y = input_dims->h;
|
|
||||||
const int32_t input_ch = input_dims->c;
|
|
||||||
const int32_t output_ch = output_dims->c;
|
|
||||||
const int32_t pad_x = dw_conv_params->padding.w;
|
|
||||||
const int32_t pad_y = dw_conv_params->padding.h;
|
|
||||||
const int32_t stride_x = dw_conv_params->stride.w;
|
|
||||||
const int32_t stride_y = dw_conv_params->stride.h;
|
|
||||||
const int32_t *output_shift = quant_params->shift;
|
|
||||||
const int32_t *output_mult = quant_params->multiplier;
|
|
||||||
const int32_t output_x = output_dims->w;
|
|
||||||
const int32_t output_y = output_dims->h;
|
|
||||||
const int32_t output_offset = dw_conv_params->output_offset;
|
|
||||||
const int32_t input_offset = dw_conv_params->input_offset;
|
|
||||||
const int32_t output_activation_min = dw_conv_params->activation.min;
|
|
||||||
const int32_t output_activation_max = dw_conv_params->activation.max;
|
|
||||||
|
|
||||||
/* Check input constraints input_ch == output_ch */
|
|
||||||
if (input_ch != output_ch)
|
|
||||||
{
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
/* Check input constraints pad_x <= 1 */
|
|
||||||
if (pad_x > 1 || filter_dims->w != 3 || filter_dims->h != 3)
|
|
||||||
{
|
|
||||||
return ARM_MATH_ARGUMENT_ERROR;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int32_t in_h = -pad_y, out_h = 0, out_idx = 0; out_h < output_y; in_h += stride_y, ++out_h)
|
|
||||||
{
|
|
||||||
for (int32_t in_w = -pad_x, out_w = 0, ker_h_start = MAX(0, -in_h); out_w < output_x; in_w += stride_x, ++out_w)
|
|
||||||
{
|
|
||||||
int32_t in_ch = 0;
|
|
||||||
int32_t ker_w_start = MAX(0, -in_w);
|
|
||||||
|
|
||||||
for (; in_ch <= (input_ch - 4); in_ch += 4)
|
|
||||||
{
|
|
||||||
int32_t out_buff0 = bias[in_ch + 0];
|
|
||||||
int32_t out_buff1 = bias[in_ch + 1];
|
|
||||||
int32_t out_buff2 = bias[in_ch + 2];
|
|
||||||
int32_t out_buff3 = bias[in_ch + 3];
|
|
||||||
|
|
||||||
const int8_t *input_ptr = input + (in_h + ker_h_start) * (input_ch * input_x) + in_w * input_ch + in_ch;
|
|
||||||
const int8_t *kernel_ptr = kernel + ker_h_start * (input_ch * 3) + in_ch;
|
|
||||||
|
|
||||||
for (int32_t ker_h = ker_h_start; ker_h < MIN(3, input_y - in_h); ++ker_h)
|
|
||||||
{
|
|
||||||
int32_t in_val = 0;
|
|
||||||
int32_t ker_val = 0;
|
|
||||||
|
|
||||||
if (ker_w_start == 0)
|
|
||||||
{
|
|
||||||
in_val = arm_nn_read_q7x4(input_ptr);
|
|
||||||
ker_val = arm_nn_read_q7x4(kernel_ptr);
|
|
||||||
|
|
||||||
out_buff0 += ((int8_t)in_val + input_offset) * (int8_t)ker_val;
|
|
||||||
out_buff1 += ((int8_t)(in_val >> 8) + input_offset) * (int8_t)(ker_val >> 8);
|
|
||||||
out_buff2 += ((int8_t)(in_val >> 16) + input_offset) * (int8_t)(ker_val >> 16);
|
|
||||||
out_buff3 += ((int8_t)(in_val >> 24) + input_offset) * (int8_t)(ker_val >> 24);
|
|
||||||
}
|
|
||||||
|
|
||||||
in_val = arm_nn_read_q7x4(input_ptr + input_ch);
|
|
||||||
ker_val = arm_nn_read_q7x4(kernel_ptr + input_ch);
|
|
||||||
|
|
||||||
out_buff0 += ((int8_t)in_val + input_offset) * (int8_t)ker_val;
|
|
||||||
out_buff1 += ((int8_t)(in_val >> 8) + input_offset) * (int8_t)(ker_val >> 8);
|
|
||||||
out_buff2 += ((int8_t)(in_val >> 16) + input_offset) * (int8_t)(ker_val >> 16);
|
|
||||||
out_buff3 += ((int8_t)(in_val >> 24) + input_offset) * (int8_t)(ker_val >> 24);
|
|
||||||
|
|
||||||
if ((input_x - in_w) >= 3)
|
|
||||||
{
|
|
||||||
in_val = arm_nn_read_q7x4(input_ptr + (input_ch << 1));
|
|
||||||
ker_val = arm_nn_read_q7x4(kernel_ptr + (input_ch << 1));
|
|
||||||
|
|
||||||
out_buff0 += ((int8_t)in_val + input_offset) * (int8_t)ker_val;
|
|
||||||
out_buff1 += ((int8_t)(in_val >> 8) + input_offset) * (int8_t)(ker_val >> 8);
|
|
||||||
out_buff2 += ((int8_t)(in_val >> 16) + input_offset) * (int8_t)(ker_val >> 16);
|
|
||||||
out_buff3 += ((int8_t)(in_val >> 24) + input_offset) * (int8_t)(ker_val >> 24);
|
|
||||||
}
|
|
||||||
|
|
||||||
input_ptr += (input_ch * input_x);
|
|
||||||
kernel_ptr += (input_ch * 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
out_buff0 = arm_nn_requantize(out_buff0, output_mult[in_ch + 0], output_shift[in_ch + 0]);
|
|
||||||
out_buff1 = arm_nn_requantize(out_buff1, output_mult[in_ch + 1], output_shift[in_ch + 1]);
|
|
||||||
out_buff2 = arm_nn_requantize(out_buff2, output_mult[in_ch + 2], output_shift[in_ch + 2]);
|
|
||||||
out_buff3 = arm_nn_requantize(out_buff3, output_mult[in_ch + 3], output_shift[in_ch + 3]);
|
|
||||||
|
|
||||||
out_buff0 += output_offset;
|
|
||||||
out_buff1 += output_offset;
|
|
||||||
out_buff2 += output_offset;
|
|
||||||
out_buff3 += output_offset;
|
|
||||||
|
|
||||||
out_buff0 = MIN(MAX(out_buff0, output_activation_min), output_activation_max);
|
|
||||||
out_buff1 = MIN(MAX(out_buff1, output_activation_min), output_activation_max);
|
|
||||||
out_buff2 = MIN(MAX(out_buff2, output_activation_min), output_activation_max);
|
|
||||||
out_buff3 = MIN(MAX(out_buff3, output_activation_min), output_activation_max);
|
|
||||||
|
|
||||||
output[out_idx++] = (int8_t)out_buff0;
|
|
||||||
output[out_idx++] = (int8_t)out_buff1;
|
|
||||||
output[out_idx++] = (int8_t)out_buff2;
|
|
||||||
output[out_idx++] = (int8_t)out_buff3;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Leftover
|
|
||||||
for (; in_ch < input_ch; ++in_ch)
|
|
||||||
{
|
|
||||||
int32_t out_buff = bias[in_ch];
|
|
||||||
|
|
||||||
const int8_t *input_ptr = input + (in_h + ker_h_start) * (input_ch * input_x) + in_w * input_ch + in_ch;
|
|
||||||
const int8_t *kernel_ptr = kernel + ker_h_start * (input_ch * 3) + in_ch;
|
|
||||||
|
|
||||||
for (int32_t ker_h = ker_h_start; ker_h < MIN(3, input_y - in_h); ++ker_h)
|
|
||||||
{
|
|
||||||
if (ker_w_start == 0)
|
|
||||||
{
|
|
||||||
out_buff += (*(input_ptr) + input_offset) * *(kernel_ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
out_buff += (*(input_ptr + input_ch) + input_offset) * *(kernel_ptr + input_ch);
|
|
||||||
|
|
||||||
if ((input_x - in_w) >= 3)
|
|
||||||
{
|
|
||||||
out_buff += (*(input_ptr + (input_ch << 1)) + input_offset) * *(kernel_ptr + (input_ch << 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
input_ptr += (input_ch * input_x);
|
|
||||||
kernel_ptr += (input_ch * 3);
|
|
||||||
}
|
|
||||||
|
|
||||||
out_buff = arm_nn_requantize(out_buff, output_mult[in_ch], output_shift[in_ch]);
|
|
||||||
out_buff += output_offset;
|
|
||||||
out_buff = MIN(MAX(out_buff, output_activation_min), output_activation_max);
|
|
||||||
output[out_idx++] = (int8_t)out_buff;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,292 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2022 Arm Limited or its affiliates.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_depthwise_conv_s16.c
|
|
||||||
* Description: s16 version of depthwise convolution.
|
|
||||||
*
|
|
||||||
* $Date: 26. Jan 2022
|
|
||||||
* $Revision: V.1.0.0
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M CPUs
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
static void __attribute__((unused)) depthwise_conv_s16_mult_4_s16(const int16_t *input,
|
|
||||||
const int32_t input_x,
|
|
||||||
const int32_t input_y,
|
|
||||||
const int32_t input_ch,
|
|
||||||
const int8_t *kernel,
|
|
||||||
const int32_t output_ch,
|
|
||||||
const int32_t ch_mult,
|
|
||||||
const int32_t kernel_x,
|
|
||||||
const int32_t kernel_y,
|
|
||||||
const int32_t pad_x,
|
|
||||||
const int32_t pad_y,
|
|
||||||
const int32_t stride_x,
|
|
||||||
const int32_t stride_y,
|
|
||||||
const int64_t *bias,
|
|
||||||
int16_t *output,
|
|
||||||
const int32_t *output_shift,
|
|
||||||
const int32_t *output_mult,
|
|
||||||
const int32_t output_x,
|
|
||||||
const int32_t output_y,
|
|
||||||
const int32_t output_activation_min,
|
|
||||||
const int32_t output_activation_max)
|
|
||||||
{
|
|
||||||
for (int32_t in_h = -pad_y, out_h = 0, out_idx = 0; out_h < output_y; in_h += stride_y, ++out_h)
|
|
||||||
{
|
|
||||||
for (int32_t in_w = -pad_x, out_w = 0, ker_h_start = MAX(0, -in_h); out_w < output_x; in_w += stride_x, ++out_w)
|
|
||||||
{
|
|
||||||
for (int32_t in_ch = 0, out_ch = 0, ker_w_start = MAX(0, -in_w); out_ch < output_ch;
|
|
||||||
++in_ch, out_ch += ch_mult)
|
|
||||||
{
|
|
||||||
for (int mult_tile = 0; mult_tile < ch_mult; mult_tile += 4)
|
|
||||||
{
|
|
||||||
int32_t out_buff32[4] = {REDUCE_MULTIPLIER(output_mult[out_ch + 0 + mult_tile]),
|
|
||||||
REDUCE_MULTIPLIER(output_mult[out_ch + 1 + mult_tile]),
|
|
||||||
REDUCE_MULTIPLIER(output_mult[out_ch + 2 + mult_tile]),
|
|
||||||
REDUCE_MULTIPLIER(output_mult[out_ch + 3 + mult_tile])};
|
|
||||||
|
|
||||||
int64_t out_buff[4] = {0, 0, 0, 0};
|
|
||||||
|
|
||||||
if (bias)
|
|
||||||
{
|
|
||||||
out_buff[0] = bias[out_ch + 0 + mult_tile];
|
|
||||||
out_buff[1] = bias[out_ch + 1 + mult_tile];
|
|
||||||
out_buff[2] = bias[out_ch + 2 + mult_tile];
|
|
||||||
out_buff[3] = bias[out_ch + 3 + mult_tile];
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int32_t ker_h = ker_h_start; ker_h < MIN(kernel_y, input_y - in_h); ++ker_h)
|
|
||||||
{
|
|
||||||
int32_t ker_idx = ker_h * (output_ch * kernel_x) + ker_w_start * output_ch + out_ch;
|
|
||||||
int32_t in_idx = (in_h + ker_h) * (input_ch * input_x) + in_w * input_ch + in_ch;
|
|
||||||
#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
|
|
||||||
#pragma clang loop unroll(disable)
|
|
||||||
#endif
|
|
||||||
for (int32_t ker_w = ker_w_start; ker_w < MIN(kernel_x, input_x - in_w);
|
|
||||||
++ker_w, ker_idx += output_ch)
|
|
||||||
{
|
|
||||||
// TODO: Unroll of 4 with 64 bit accumulator will probably result in too much register
|
|
||||||
// spills. Try with unroll of 2 when enabling this.
|
|
||||||
int32_t in_val = input[in_idx + ker_w * input_ch];
|
|
||||||
out_buff[0] += in_val * kernel[ker_idx + 0 + mult_tile];
|
|
||||||
out_buff[1] += in_val * kernel[ker_idx + 1 + mult_tile];
|
|
||||||
out_buff[2] += in_val * kernel[ker_idx + 2 + mult_tile];
|
|
||||||
out_buff[3] += in_val * kernel[ker_idx + 3 + mult_tile];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
out_buff32[0] =
|
|
||||||
arm_nn_requantize_s64(out_buff[0], out_buff32[0], output_shift[out_ch + 0 + mult_tile]);
|
|
||||||
out_buff32[1] =
|
|
||||||
arm_nn_requantize_s64(out_buff[1], out_buff32[1], output_shift[out_ch + 1 + mult_tile]);
|
|
||||||
out_buff32[2] =
|
|
||||||
arm_nn_requantize_s64(out_buff[2], out_buff32[2], output_shift[out_ch + 2 + mult_tile]);
|
|
||||||
out_buff32[3] =
|
|
||||||
arm_nn_requantize_s64(out_buff[3], out_buff32[3], output_shift[out_ch + 3 + mult_tile]);
|
|
||||||
|
|
||||||
out_buff32[0] = MIN(MAX(out_buff32[0], output_activation_min), output_activation_max);
|
|
||||||
out_buff32[1] = MIN(MAX(out_buff32[1], output_activation_min), output_activation_max);
|
|
||||||
out_buff32[2] = MIN(MAX(out_buff32[2], output_activation_min), output_activation_max);
|
|
||||||
out_buff32[3] = MIN(MAX(out_buff32[3], output_activation_min), output_activation_max);
|
|
||||||
|
|
||||||
output[out_idx++] = (int16_t)out_buff32[0];
|
|
||||||
output[out_idx++] = (int16_t)out_buff32[1];
|
|
||||||
output[out_idx++] = (int16_t)out_buff32[2];
|
|
||||||
output[out_idx++] = (int16_t)out_buff32[3];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void depthwise_conv_s16_generic_s16(const int16_t *input,
|
|
||||||
const uint16_t input_batches,
|
|
||||||
const uint16_t input_x,
|
|
||||||
const uint16_t input_y,
|
|
||||||
const uint16_t input_ch,
|
|
||||||
const int8_t *kernel,
|
|
||||||
const uint16_t ch_mult,
|
|
||||||
const uint16_t kernel_x,
|
|
||||||
const uint16_t kernel_y,
|
|
||||||
const uint16_t pad_x,
|
|
||||||
const uint16_t pad_y,
|
|
||||||
const uint16_t stride_x,
|
|
||||||
const uint16_t stride_y,
|
|
||||||
const int64_t *bias,
|
|
||||||
int16_t *output,
|
|
||||||
const int32_t *output_shift,
|
|
||||||
const int32_t *output_mult,
|
|
||||||
const uint16_t output_x,
|
|
||||||
const uint16_t output_y,
|
|
||||||
const int32_t output_activation_min,
|
|
||||||
const int32_t output_activation_max,
|
|
||||||
const uint16_t dilation_x,
|
|
||||||
const uint16_t dilation_y)
|
|
||||||
|
|
||||||
{
|
|
||||||
for (int i_batch = 0; i_batch < input_batches; i_batch++)
|
|
||||||
{
|
|
||||||
for (int i_out_y = 0; i_out_y < output_y; i_out_y++)
|
|
||||||
{
|
|
||||||
const int16_t base_idx_y = (i_out_y * stride_y) - pad_y;
|
|
||||||
for (int i_out_x = 0; i_out_x < output_x; i_out_x++)
|
|
||||||
{
|
|
||||||
const int16_t base_idx_x = (i_out_x * stride_x) - pad_x;
|
|
||||||
for (int i_input_ch = 0; i_input_ch < input_ch; i_input_ch++)
|
|
||||||
{
|
|
||||||
for (int i_ch_mult = 0; i_ch_mult < ch_mult; i_ch_mult++)
|
|
||||||
{
|
|
||||||
const int idx_out_ch = i_ch_mult + i_input_ch * ch_mult;
|
|
||||||
|
|
||||||
const q31_t reduced_multiplier = REDUCE_MULTIPLIER(output_mult[idx_out_ch]);
|
|
||||||
int64_t acc_0 = 0;
|
|
||||||
|
|
||||||
int ker_y_start;
|
|
||||||
int ker_x_start;
|
|
||||||
int ker_y_end;
|
|
||||||
int ker_x_end;
|
|
||||||
|
|
||||||
if (dilation_x > 1)
|
|
||||||
{
|
|
||||||
const int32_t start_x_max = (-base_idx_x + dilation_x - 1) / dilation_x;
|
|
||||||
ker_x_start = MAX(0, start_x_max);
|
|
||||||
const int32_t end_min_x = (input_x - base_idx_x + dilation_x - 1) / dilation_x;
|
|
||||||
ker_x_end = MIN(kernel_x, end_min_x);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ker_x_start = MAX(0, -base_idx_x);
|
|
||||||
ker_x_end = MIN(kernel_x, input_x - base_idx_x);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dilation_y > 1)
|
|
||||||
{
|
|
||||||
const int32_t start_y_max = (-base_idx_y + dilation_y - 1) / dilation_y;
|
|
||||||
ker_y_start = MAX(0, start_y_max);
|
|
||||||
const int32_t end_min_y = (input_y - base_idx_y + dilation_y - 1) / dilation_y;
|
|
||||||
ker_y_end = MIN(kernel_y, end_min_y);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ker_y_start = MAX(0, -base_idx_y);
|
|
||||||
ker_y_end = MIN(kernel_y, input_y - base_idx_y);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bias)
|
|
||||||
{
|
|
||||||
acc_0 = bias[idx_out_ch];
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++)
|
|
||||||
{
|
|
||||||
const int32_t idx_y = base_idx_y + dilation_y * i_ker_y;
|
|
||||||
for (int i_ker_x = ker_x_start; i_ker_x < ker_x_end; i_ker_x++)
|
|
||||||
{
|
|
||||||
const int32_t idx_x = base_idx_x + dilation_x * i_ker_x;
|
|
||||||
int32_t idx_0 = (idx_y * input_x + idx_x) * input_ch + i_input_ch;
|
|
||||||
int32_t ker_idx_0 = (i_ker_y * kernel_x + i_ker_x) * (input_ch * ch_mult) + idx_out_ch;
|
|
||||||
|
|
||||||
acc_0 += input[idx_0] * kernel[ker_idx_0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Requantize and clamp output to provided range */
|
|
||||||
int32_t result = arm_nn_requantize_s64(acc_0, reduced_multiplier, output_shift[idx_out_ch]);
|
|
||||||
result = MAX(result, output_activation_min);
|
|
||||||
result = MIN(result, output_activation_max);
|
|
||||||
*output++ = (int16_t)result;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* Advance to the next batch */
|
|
||||||
input += (input_x * input_y * input_ch);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Basic s16 depthwise convolution function.
|
|
||||||
*
|
|
||||||
* Refer header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
arm_status arm_depthwise_conv_s16(const cmsis_nn_context *ctx,
|
|
||||||
const cmsis_nn_dw_conv_params *dw_conv_params,
|
|
||||||
const cmsis_nn_per_channel_quant_params *quant_params,
|
|
||||||
const cmsis_nn_dims *input_dims,
|
|
||||||
const q15_t *input,
|
|
||||||
const cmsis_nn_dims *filter_dims,
|
|
||||||
const q7_t *kernel,
|
|
||||||
const cmsis_nn_dims *bias_dims,
|
|
||||||
const int64_t *bias,
|
|
||||||
const cmsis_nn_dims *output_dims,
|
|
||||||
q15_t *output)
|
|
||||||
{
|
|
||||||
const uint16_t dilation_x = dw_conv_params->dilation.w;
|
|
||||||
const uint16_t dilation_y = dw_conv_params->dilation.h;
|
|
||||||
|
|
||||||
(void)bias_dims;
|
|
||||||
(void)ctx;
|
|
||||||
|
|
||||||
depthwise_conv_s16_generic_s16(input,
|
|
||||||
input_dims->n,
|
|
||||||
input_dims->w,
|
|
||||||
input_dims->h,
|
|
||||||
input_dims->c,
|
|
||||||
kernel,
|
|
||||||
dw_conv_params->ch_mult,
|
|
||||||
filter_dims->w,
|
|
||||||
filter_dims->h,
|
|
||||||
dw_conv_params->padding.w,
|
|
||||||
dw_conv_params->padding.h,
|
|
||||||
dw_conv_params->stride.w,
|
|
||||||
dw_conv_params->stride.h,
|
|
||||||
bias,
|
|
||||||
output,
|
|
||||||
quant_params->shift,
|
|
||||||
quant_params->multiplier,
|
|
||||||
output_dims->w,
|
|
||||||
output_dims->h,
|
|
||||||
dw_conv_params->activation.min,
|
|
||||||
dw_conv_params->activation.max,
|
|
||||||
dilation_x,
|
|
||||||
dilation_y);
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,347 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_depthwise_conv_s8.c
|
|
||||||
* Description: s8 version of depthwise convolution.
|
|
||||||
*
|
|
||||||
* $Date: 30. Dec 2021
|
|
||||||
* $Revision: V.2.7.1
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M CPUs
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
static void depthwise_conv_s8_mult_4(const int8_t *input,
|
|
||||||
const int32_t input_x,
|
|
||||||
const int32_t input_y,
|
|
||||||
const int32_t input_ch,
|
|
||||||
const int8_t *kernel,
|
|
||||||
const int32_t output_ch,
|
|
||||||
const int32_t ch_mult,
|
|
||||||
const int32_t kernel_x,
|
|
||||||
const int32_t kernel_y,
|
|
||||||
const int32_t pad_x,
|
|
||||||
const int32_t pad_y,
|
|
||||||
const int32_t stride_x,
|
|
||||||
const int32_t stride_y,
|
|
||||||
const int32_t *bias,
|
|
||||||
int8_t *output,
|
|
||||||
const int32_t *output_shift,
|
|
||||||
const int32_t *output_mult,
|
|
||||||
const int32_t output_x,
|
|
||||||
const int32_t output_y,
|
|
||||||
const int32_t output_offset,
|
|
||||||
const int32_t input_offset,
|
|
||||||
const int32_t output_activation_min,
|
|
||||||
const int32_t output_activation_max)
|
|
||||||
{
|
|
||||||
for (int32_t in_h = -pad_y, out_h = 0, out_idx = 0; out_h < output_y; in_h += stride_y, ++out_h)
|
|
||||||
{
|
|
||||||
for (int32_t in_w = -pad_x, out_w = 0, ker_h_start = MAX(0, -in_h); out_w < output_x; in_w += stride_x, ++out_w)
|
|
||||||
{
|
|
||||||
for (int32_t in_ch = 0, out_ch = 0, ker_w_start = MAX(0, -in_w); out_ch < output_ch;
|
|
||||||
++in_ch, out_ch += ch_mult)
|
|
||||||
{
|
|
||||||
for (int mult_tile = 0; mult_tile < ch_mult; mult_tile += 4)
|
|
||||||
{
|
|
||||||
int32_t out_buff[4] = {0, 0, 0, 0};
|
|
||||||
if (bias)
|
|
||||||
{
|
|
||||||
out_buff[0] = bias[out_ch + 0 + mult_tile];
|
|
||||||
out_buff[1] = bias[out_ch + 1 + mult_tile];
|
|
||||||
out_buff[2] = bias[out_ch + 2 + mult_tile];
|
|
||||||
out_buff[3] = bias[out_ch + 3 + mult_tile];
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int32_t ker_h = ker_h_start; ker_h < MIN(kernel_y, input_y - in_h); ++ker_h)
|
|
||||||
{
|
|
||||||
int32_t ker_idx = ker_h * (output_ch * kernel_x) + ker_w_start * output_ch + out_ch;
|
|
||||||
int32_t in_idx = (in_h + ker_h) * (input_ch * input_x) + in_w * input_ch + in_ch;
|
|
||||||
#if defined(__ARMCC_VERSION) && (__ARMCC_VERSION >= 6010050)
|
|
||||||
#pragma clang loop unroll(disable)
|
|
||||||
#endif
|
|
||||||
for (int32_t ker_w = ker_w_start; ker_w < MIN(kernel_x, input_x - in_w);
|
|
||||||
++ker_w, ker_idx += output_ch)
|
|
||||||
{
|
|
||||||
int32_t in_val = input[in_idx + ker_w * input_ch] + input_offset;
|
|
||||||
out_buff[0] += in_val * kernel[ker_idx + 0 + mult_tile];
|
|
||||||
out_buff[1] += in_val * kernel[ker_idx + 1 + mult_tile];
|
|
||||||
out_buff[2] += in_val * kernel[ker_idx + 2 + mult_tile];
|
|
||||||
out_buff[3] += in_val * kernel[ker_idx + 3 + mult_tile];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#if defined(ARM_MATH_MVEI)
|
|
||||||
(void)out_idx;
|
|
||||||
int32x4_t res = vldrwq_s32(out_buff);
|
|
||||||
res = arm_requantize_mve_32x4(res,
|
|
||||||
vldrwq_s32(&output_mult[out_ch + mult_tile]),
|
|
||||||
vldrwq_s32(&output_shift[out_ch + mult_tile]));
|
|
||||||
res = vaddq_n_s32(res, output_offset);
|
|
||||||
|
|
||||||
res = vmaxq_s32(res, vdupq_n_s32(output_activation_min));
|
|
||||||
res = vminq_s32(res, vdupq_n_s32(output_activation_max));
|
|
||||||
vstrbq_s32(output, res);
|
|
||||||
output += 4;
|
|
||||||
#else
|
|
||||||
out_buff[0] = arm_nn_requantize(
|
|
||||||
out_buff[0], output_mult[out_ch + 0 + mult_tile], output_shift[out_ch + 0 + mult_tile]);
|
|
||||||
out_buff[1] = arm_nn_requantize(
|
|
||||||
out_buff[1], output_mult[out_ch + 1 + mult_tile], output_shift[out_ch + 1 + mult_tile]);
|
|
||||||
out_buff[2] = arm_nn_requantize(
|
|
||||||
out_buff[2], output_mult[out_ch + 2 + mult_tile], output_shift[out_ch + 2 + mult_tile]);
|
|
||||||
out_buff[3] = arm_nn_requantize(
|
|
||||||
out_buff[3], output_mult[out_ch + 3 + mult_tile], output_shift[out_ch + 3 + mult_tile]);
|
|
||||||
|
|
||||||
out_buff[0] += output_offset;
|
|
||||||
out_buff[1] += output_offset;
|
|
||||||
out_buff[2] += output_offset;
|
|
||||||
out_buff[3] += output_offset;
|
|
||||||
|
|
||||||
out_buff[0] = MIN(MAX(out_buff[0], output_activation_min), output_activation_max);
|
|
||||||
out_buff[1] = MIN(MAX(out_buff[1], output_activation_min), output_activation_max);
|
|
||||||
out_buff[2] = MIN(MAX(out_buff[2], output_activation_min), output_activation_max);
|
|
||||||
out_buff[3] = MIN(MAX(out_buff[3], output_activation_min), output_activation_max);
|
|
||||||
|
|
||||||
output[out_idx++] = (int8_t)out_buff[0];
|
|
||||||
output[out_idx++] = (int8_t)out_buff[1];
|
|
||||||
output[out_idx++] = (int8_t)out_buff[2];
|
|
||||||
output[out_idx++] = (int8_t)out_buff[3];
|
|
||||||
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void depthwise_conv_s8_generic(const q7_t *input,
|
|
||||||
const uint16_t input_batches,
|
|
||||||
const uint16_t input_x,
|
|
||||||
const uint16_t input_y,
|
|
||||||
const uint16_t input_ch,
|
|
||||||
const q7_t *kernel,
|
|
||||||
const uint16_t output_ch,
|
|
||||||
const uint16_t ch_mult,
|
|
||||||
const uint16_t kernel_x,
|
|
||||||
const uint16_t kernel_y,
|
|
||||||
const uint16_t pad_x,
|
|
||||||
const uint16_t pad_y,
|
|
||||||
const uint16_t stride_x,
|
|
||||||
const uint16_t stride_y,
|
|
||||||
const int32_t *bias,
|
|
||||||
q7_t *output,
|
|
||||||
const int32_t *output_shift,
|
|
||||||
const int32_t *output_mult,
|
|
||||||
const uint16_t output_x,
|
|
||||||
const uint16_t output_y,
|
|
||||||
const int32_t output_offset,
|
|
||||||
const int32_t input_offset,
|
|
||||||
const int32_t output_activation_min,
|
|
||||||
const int32_t output_activation_max,
|
|
||||||
const uint16_t dilation_x,
|
|
||||||
const uint16_t dilation_y)
|
|
||||||
|
|
||||||
{
|
|
||||||
(void)output_ch;
|
|
||||||
int i_out = 0;
|
|
||||||
int i_batch;
|
|
||||||
|
|
||||||
for (i_batch = 0; i_batch < input_batches; i_batch++)
|
|
||||||
{
|
|
||||||
for (int i_out_y = 0; i_out_y < output_y; i_out_y++)
|
|
||||||
{
|
|
||||||
const int16_t base_idx_y = (i_out_y * stride_y) - pad_y;
|
|
||||||
for (int i_out_x = 0; i_out_x < output_x; i_out_x++)
|
|
||||||
{
|
|
||||||
const int16_t base_idx_x = (i_out_x * stride_x) - pad_x;
|
|
||||||
for (int i_input_ch = 0; i_input_ch < input_ch; i_input_ch++)
|
|
||||||
{
|
|
||||||
for (int i_ch_mult = 0; i_ch_mult < ch_mult; i_ch_mult++)
|
|
||||||
{
|
|
||||||
const int idx_out_ch = i_ch_mult + i_input_ch * ch_mult;
|
|
||||||
int32_t acc_0 = 0;
|
|
||||||
|
|
||||||
int ker_y_start;
|
|
||||||
int ker_x_start;
|
|
||||||
int ker_y_end;
|
|
||||||
int ker_x_end;
|
|
||||||
|
|
||||||
if (dilation_x > 1)
|
|
||||||
{
|
|
||||||
const int32_t start_x_max = (-base_idx_x + dilation_x - 1) / dilation_x;
|
|
||||||
ker_x_start = MAX(0, start_x_max);
|
|
||||||
const int32_t end_min_x = (input_x - base_idx_x + dilation_x - 1) / dilation_x;
|
|
||||||
ker_x_end = MIN(kernel_x, end_min_x);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ker_x_start = MAX(0, -base_idx_x);
|
|
||||||
ker_x_end = MIN(kernel_x, input_x - base_idx_x);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (dilation_y > 1)
|
|
||||||
{
|
|
||||||
const int32_t start_y_max = (-base_idx_y + dilation_y - 1) / dilation_y;
|
|
||||||
ker_y_start = MAX(0, start_y_max);
|
|
||||||
const int32_t end_min_y = (input_y - base_idx_y + dilation_y - 1) / dilation_y;
|
|
||||||
ker_y_end = MIN(kernel_y, end_min_y);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
ker_y_start = MAX(0, -base_idx_y);
|
|
||||||
ker_y_end = MIN(kernel_y, input_y - base_idx_y);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bias)
|
|
||||||
{
|
|
||||||
acc_0 = bias[idx_out_ch];
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++)
|
|
||||||
{
|
|
||||||
const int32_t idx_y = base_idx_y + dilation_y * i_ker_y;
|
|
||||||
for (int i_ker_x = ker_x_start; i_ker_x < ker_x_end; i_ker_x++)
|
|
||||||
{
|
|
||||||
const int32_t idx_x = base_idx_x + dilation_x * i_ker_x;
|
|
||||||
int32_t idx_0 = (idx_y * input_x + idx_x) * input_ch + i_input_ch;
|
|
||||||
int32_t ker_idx_0 = (i_ker_y * kernel_x + i_ker_x) * (input_ch * ch_mult) + idx_out_ch;
|
|
||||||
|
|
||||||
acc_0 += (input[idx_0] + input_offset) * kernel[ker_idx_0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Requantize and clamp output to provided range */
|
|
||||||
acc_0 = arm_nn_requantize(acc_0, output_mult[idx_out_ch], output_shift[idx_out_ch]);
|
|
||||||
acc_0 += output_offset;
|
|
||||||
acc_0 = MAX(acc_0, output_activation_min);
|
|
||||||
acc_0 = MIN(acc_0, output_activation_max);
|
|
||||||
|
|
||||||
output[i_out++] = acc_0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
/* Advance to the next batch */
|
|
||||||
input += (input_x * input_y * input_ch);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Basic s8 depthwise convolution function.
|
|
||||||
*
|
|
||||||
* Refer header file for details.
|
|
||||||
* Optimization using DSP extension is not available for the generic case where channel multiplier is > 1.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
arm_status arm_depthwise_conv_s8(const cmsis_nn_context *ctx,
|
|
||||||
const cmsis_nn_dw_conv_params *dw_conv_params,
|
|
||||||
const cmsis_nn_per_channel_quant_params *quant_params,
|
|
||||||
const cmsis_nn_dims *input_dims,
|
|
||||||
const q7_t *input,
|
|
||||||
const cmsis_nn_dims *filter_dims,
|
|
||||||
const q7_t *kernel,
|
|
||||||
const cmsis_nn_dims *bias_dims,
|
|
||||||
const int32_t *bias,
|
|
||||||
const cmsis_nn_dims *output_dims,
|
|
||||||
q7_t *output)
|
|
||||||
{
|
|
||||||
const uint16_t dilation_x = dw_conv_params->dilation.w;
|
|
||||||
const uint16_t dilation_y = dw_conv_params->dilation.h;
|
|
||||||
|
|
||||||
(void)dw_conv_params->dilation;
|
|
||||||
(void)bias_dims;
|
|
||||||
(void)ctx;
|
|
||||||
|
|
||||||
if (dw_conv_params->ch_mult % 4 == 0 && input_dims->n == 1 && dw_conv_params->dilation.w == 1 &&
|
|
||||||
dw_conv_params->dilation.h == 1)
|
|
||||||
{
|
|
||||||
depthwise_conv_s8_mult_4(input,
|
|
||||||
input_dims->w,
|
|
||||||
input_dims->h,
|
|
||||||
input_dims->c,
|
|
||||||
kernel,
|
|
||||||
output_dims->c,
|
|
||||||
dw_conv_params->ch_mult,
|
|
||||||
filter_dims->w,
|
|
||||||
filter_dims->h,
|
|
||||||
dw_conv_params->padding.w,
|
|
||||||
dw_conv_params->padding.h,
|
|
||||||
dw_conv_params->stride.w,
|
|
||||||
dw_conv_params->stride.h,
|
|
||||||
bias,
|
|
||||||
output,
|
|
||||||
quant_params->shift,
|
|
||||||
quant_params->multiplier,
|
|
||||||
output_dims->w,
|
|
||||||
output_dims->h,
|
|
||||||
dw_conv_params->output_offset,
|
|
||||||
dw_conv_params->input_offset,
|
|
||||||
dw_conv_params->activation.min,
|
|
||||||
dw_conv_params->activation.max);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
depthwise_conv_s8_generic(input,
|
|
||||||
input_dims->n,
|
|
||||||
input_dims->w,
|
|
||||||
input_dims->h,
|
|
||||||
input_dims->c,
|
|
||||||
kernel,
|
|
||||||
output_dims->c,
|
|
||||||
dw_conv_params->ch_mult,
|
|
||||||
filter_dims->w,
|
|
||||||
filter_dims->h,
|
|
||||||
dw_conv_params->padding.w,
|
|
||||||
dw_conv_params->padding.h,
|
|
||||||
dw_conv_params->stride.w,
|
|
||||||
dw_conv_params->stride.h,
|
|
||||||
bias,
|
|
||||||
output,
|
|
||||||
quant_params->shift,
|
|
||||||
quant_params->multiplier,
|
|
||||||
output_dims->w,
|
|
||||||
output_dims->h,
|
|
||||||
dw_conv_params->output_offset,
|
|
||||||
dw_conv_params->input_offset,
|
|
||||||
dw_conv_params->activation.min,
|
|
||||||
dw_conv_params->activation.max,
|
|
||||||
dilation_x,
|
|
||||||
dilation_y);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,433 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_depthwise_conv_s8_opt.c
|
|
||||||
* Description: Optimized s8 depthwise separable convolution function for
|
|
||||||
* channel multiplier of 1.
|
|
||||||
*
|
|
||||||
* $Date: January 26, 2021
|
|
||||||
* $Revision: V.2.0.3
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M CPUs
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Optimized s8 depthwise convolution function with constraint that in_channel equals out_channel
|
|
||||||
*
|
|
||||||
* Refer prototype header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_depthwise_conv_s8_opt(const cmsis_nn_context *ctx,
|
|
||||||
const cmsis_nn_dw_conv_params *dw_conv_params,
|
|
||||||
const cmsis_nn_per_channel_quant_params *quant_params,
|
|
||||||
const cmsis_nn_dims *input_dims,
|
|
||||||
const q7_t *input,
|
|
||||||
const cmsis_nn_dims *filter_dims,
|
|
||||||
const q7_t *kernel,
|
|
||||||
const cmsis_nn_dims *bias_dims,
|
|
||||||
const int32_t *bias,
|
|
||||||
const cmsis_nn_dims *output_dims,
|
|
||||||
q7_t *output)
|
|
||||||
{
|
|
||||||
|
|
||||||
const int32_t input_ch = input_dims->c;
|
|
||||||
const int32_t output_ch = output_dims->c;
|
|
||||||
|
|
||||||
/* Check input constraints input_ch == output_ch */
|
|
||||||
if (input_ch != output_ch)
|
|
||||||
{
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ctx->buf == NULL && arm_depthwise_conv_s8_opt_get_buffer_size(input_dims, filter_dims) > 0)
|
|
||||||
{
|
|
||||||
return ARM_MATH_ARGUMENT_ERROR;
|
|
||||||
}
|
|
||||||
#ifdef ARM_MATH_DSP
|
|
||||||
const int32_t input_x = input_dims->w;
|
|
||||||
const int32_t input_y = input_dims->h;
|
|
||||||
const int32_t kernel_x = filter_dims->w;
|
|
||||||
const int32_t kernel_y = filter_dims->h;
|
|
||||||
const int32_t pad_x = dw_conv_params->padding.w;
|
|
||||||
const int32_t pad_y = dw_conv_params->padding.h;
|
|
||||||
const int32_t stride_x = dw_conv_params->stride.w;
|
|
||||||
const int32_t stride_y = dw_conv_params->stride.h;
|
|
||||||
const int32_t *output_shift = quant_params->shift;
|
|
||||||
const int32_t *output_mult = quant_params->multiplier;
|
|
||||||
const int32_t output_x = output_dims->w;
|
|
||||||
const int32_t output_y = output_dims->h;
|
|
||||||
const int32_t output_offset = dw_conv_params->output_offset;
|
|
||||||
const int32_t input_offset = dw_conv_params->input_offset;
|
|
||||||
const int32_t output_activation_min = dw_conv_params->activation.min;
|
|
||||||
const int32_t output_activation_max = dw_conv_params->activation.max;
|
|
||||||
q15_t *buffer_a = (q15_t *)ctx->buf;
|
|
||||||
|
|
||||||
#ifdef ARM_MATH_MVEI
|
|
||||||
(void)bias_dims;
|
|
||||||
/* Generate two columns from the input tensor */
|
|
||||||
q7_t *lhs_buffer = (q7_t *)buffer_a;
|
|
||||||
q7_t *out = output;
|
|
||||||
int padded = 0;
|
|
||||||
int buffer_count = 0;
|
|
||||||
const int32_t kernel_size = kernel_x * kernel_y;
|
|
||||||
|
|
||||||
/* This part implements the im2col function */
|
|
||||||
for (int i_out_y = 0, base_idx_y = -pad_y; i_out_y < output_y; base_idx_y += stride_y, i_out_y++)
|
|
||||||
{
|
|
||||||
for (int i_out_x = 0, base_idx_x = -pad_x; i_out_x < output_x; base_idx_x += stride_x, i_out_x++)
|
|
||||||
{
|
|
||||||
for (int i_ker_y = base_idx_y; i_ker_y < base_idx_y + kernel_y; i_ker_y++)
|
|
||||||
{
|
|
||||||
for (int i_ker_x = base_idx_x; i_ker_x < base_idx_x + kernel_x; i_ker_x++)
|
|
||||||
{
|
|
||||||
if (i_ker_y < 0 || i_ker_y >= input_y || i_ker_x < 0 || i_ker_x >= input_x)
|
|
||||||
{
|
|
||||||
arm_memset_q7(lhs_buffer, (int8_t)-input_offset, (uint32_t)input_ch);
|
|
||||||
padded = 1;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
arm_memcpy_q7(lhs_buffer, input + (i_ker_y * input_x + i_ker_x) * input_ch, (uint32_t)input_ch);
|
|
||||||
}
|
|
||||||
lhs_buffer += input_ch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
buffer_count++;
|
|
||||||
|
|
||||||
if (buffer_count == 4)
|
|
||||||
{
|
|
||||||
lhs_buffer = (q7_t *)buffer_a;
|
|
||||||
if (padded == 0)
|
|
||||||
{
|
|
||||||
out = arm_nn_depthwise_conv_nt_t_s8(lhs_buffer,
|
|
||||||
kernel,
|
|
||||||
input_offset,
|
|
||||||
input_ch,
|
|
||||||
output_shift,
|
|
||||||
output_mult,
|
|
||||||
output_offset,
|
|
||||||
output_activation_min,
|
|
||||||
output_activation_max,
|
|
||||||
kernel_size,
|
|
||||||
bias,
|
|
||||||
out);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
out = arm_nn_depthwise_conv_nt_t_padded_s8(lhs_buffer,
|
|
||||||
kernel,
|
|
||||||
input_offset,
|
|
||||||
input_ch,
|
|
||||||
output_shift,
|
|
||||||
output_mult,
|
|
||||||
output_offset,
|
|
||||||
output_activation_min,
|
|
||||||
output_activation_max,
|
|
||||||
kernel_size,
|
|
||||||
bias,
|
|
||||||
out);
|
|
||||||
padded = 0;
|
|
||||||
}
|
|
||||||
buffer_count = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Handle left over buffers */
|
|
||||||
lhs_buffer = (q7_t *)buffer_a;
|
|
||||||
|
|
||||||
for (int i_buf = 0; i_buf < buffer_count; i_buf++)
|
|
||||||
{
|
|
||||||
int32_t loop_count = (input_ch + 3) / 4;
|
|
||||||
|
|
||||||
int32_t num_ch_to_process = input_ch;
|
|
||||||
for (int i_loop_cnt = 0, offset = 0; i_loop_cnt < loop_count; num_ch_to_process -= 4, offset += 4, i_loop_cnt++)
|
|
||||||
{
|
|
||||||
const int8_t *col_0 = lhs_buffer + (kernel_size * input_ch * i_buf) + offset;
|
|
||||||
const int8_t *row_0 = kernel + offset;
|
|
||||||
int32x4_t out_0 = vldrwq_s32(&bias[offset]);
|
|
||||||
|
|
||||||
for (int i_ker = 0; i_ker < kernel_size; i_ker++)
|
|
||||||
{
|
|
||||||
const int32x4_t ker_0 = vldrbq_s32(row_0);
|
|
||||||
|
|
||||||
int32x4_t ip_0 = vldrbq_s32(col_0);
|
|
||||||
ip_0 = vaddq_n_s32(ip_0, input_offset);
|
|
||||||
out_0 += vmulq_s32(ip_0, ker_0);
|
|
||||||
|
|
||||||
col_0 += input_ch;
|
|
||||||
row_0 += input_ch;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int32x4_t mult = vldrwq_s32(&output_mult[offset]);
|
|
||||||
const int32x4_t shift = vldrwq_s32(&output_shift[offset]);
|
|
||||||
|
|
||||||
out_0 = arm_requantize_mve_32x4(out_0, mult, shift);
|
|
||||||
out_0 = vaddq_n_s32(out_0, output_offset);
|
|
||||||
out_0 = vmaxq_s32(out_0, vdupq_n_s32(output_activation_min));
|
|
||||||
out_0 = vminq_s32(out_0, vdupq_n_s32(output_activation_max));
|
|
||||||
mve_pred16_t p = vctp32q((uint32_t)num_ch_to_process);
|
|
||||||
vstrbq_p_s32(out, out_0, p);
|
|
||||||
|
|
||||||
out += 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
const int tail_ch = input_ch & 0x3;
|
|
||||||
if (tail_ch != 0)
|
|
||||||
{
|
|
||||||
out -= (4 - tail_ch);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#else // ARM_MATH_DSP
|
|
||||||
(void)bias_dims;
|
|
||||||
/* Run the following code in cores using DSP extension */
|
|
||||||
q15_t *const col_buffer_start = buffer_a;
|
|
||||||
q15_t *col_buffer = col_buffer_start;
|
|
||||||
const int32_t *const bias_start_pos = bias;
|
|
||||||
const q31_t *const out_mult_start_pos = output_mult;
|
|
||||||
const q31_t *const out_shift_start_pos = output_shift;
|
|
||||||
uint16_t row_count;
|
|
||||||
uint16_t row_shift;
|
|
||||||
|
|
||||||
for (int i_out_y = 0; i_out_y < output_y; i_out_y++)
|
|
||||||
{
|
|
||||||
const int16_t base_idx_y = (i_out_y * stride_y) - pad_y;
|
|
||||||
for (int i_out_x = 0; i_out_x < output_x; i_out_x++)
|
|
||||||
{
|
|
||||||
const int16_t base_idx_x = (i_out_x * stride_x) - pad_x;
|
|
||||||
|
|
||||||
/* Out of bounds is only considered for the y axis as it provides a contiguous zero'ing opportunity than
|
|
||||||
along the x axis */
|
|
||||||
const int ker_y_start = MAX(0, -base_idx_y);
|
|
||||||
/* Condition for kernel end dimension: (base_idx_y + ker_y_end) < input_y */
|
|
||||||
const int ker_y_end = MIN(kernel_y, input_y - base_idx_y);
|
|
||||||
|
|
||||||
int32_t index = 0;
|
|
||||||
if (ker_y_start != 0)
|
|
||||||
{
|
|
||||||
memset(&col_buffer[index], 0, (kernel_x * input_ch) * ker_y_start * sizeof(q15_t));
|
|
||||||
index += (kernel_x * input_ch) * ker_y_start;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++)
|
|
||||||
{
|
|
||||||
const int32_t idx_y = base_idx_y + i_ker_y;
|
|
||||||
|
|
||||||
for (int i_ker_x = 0; i_ker_x < kernel_x; i_ker_x++)
|
|
||||||
{
|
|
||||||
const int32_t idx_x = base_idx_x + i_ker_x;
|
|
||||||
if (idx_x < 0 || idx_x >= input_x)
|
|
||||||
{
|
|
||||||
memset(&col_buffer[index], 0, input_ch * sizeof(q15_t));
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
arm_q7_to_q15_with_offset((q7_t *)input + (idx_y * input_x + idx_x) * input_ch,
|
|
||||||
&col_buffer[index],
|
|
||||||
input_ch,
|
|
||||||
input_offset);
|
|
||||||
}
|
|
||||||
index += input_ch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const int diff = kernel_y - ker_y_end;
|
|
||||||
if (diff != 0)
|
|
||||||
{
|
|
||||||
memset(&col_buffer[index], 0, (kernel_x * input_ch) * diff * sizeof(q15_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
row_count = output_ch / 4;
|
|
||||||
row_shift = 0;
|
|
||||||
bias = bias_start_pos;
|
|
||||||
output_mult = out_mult_start_pos;
|
|
||||||
output_shift = out_shift_start_pos;
|
|
||||||
|
|
||||||
while (row_count)
|
|
||||||
{
|
|
||||||
q31_t sum = *bias++;
|
|
||||||
q31_t sum_2 = *bias++;
|
|
||||||
q31_t sum_3 = *bias++;
|
|
||||||
q31_t sum_4 = *bias++;
|
|
||||||
|
|
||||||
uint16_t col_count = (kernel_x * kernel_y) / 2;
|
|
||||||
q15_t *col_pos = col_buffer_start + row_shift;
|
|
||||||
const q7_t *row_pos = kernel + row_shift;
|
|
||||||
row_shift += 4;
|
|
||||||
|
|
||||||
while (col_count)
|
|
||||||
{
|
|
||||||
/* General idea is to read 4 + 4 (input, kernel) pair and re-arrange them in the right order to
|
|
||||||
use in a SMLAD instruction . One run of this loop produces 4 partial outputs with 8 MACs. */
|
|
||||||
/* Note: variable names can be improved here to align with rows and columns. */
|
|
||||||
q31_t ip_a1, ip_a2, ip_b1, ip_b2, op_a, op_b, op_c;
|
|
||||||
/* Read 4 weights */
|
|
||||||
ip_b1 = arm_nn_read_q7x4(row_pos);
|
|
||||||
ip_a1 = arm_nn_read_q7x4(row_pos + input_ch);
|
|
||||||
op_a = arm_nn_read_q15x2(col_pos);
|
|
||||||
op_b = arm_nn_read_q15x2(col_pos + input_ch);
|
|
||||||
|
|
||||||
ip_a2 = __SXTB16(ip_b1);
|
|
||||||
ip_b1 = __SXTB16(__ROR(ip_b1, 8));
|
|
||||||
|
|
||||||
ip_b2 = __SXTB16(ip_a1);
|
|
||||||
ip_a1 = __SXTB16(__ROR(ip_a1, 8));
|
|
||||||
|
|
||||||
op_c = __PKHBT(op_b, op_a, 16);
|
|
||||||
op_a = __PKHTB(op_b, op_a, 16);
|
|
||||||
op_b = __PKHBT(ip_b2, ip_a2, 16);
|
|
||||||
sum = __SMLAD(op_c, op_b, sum);
|
|
||||||
|
|
||||||
op_b = __PKHBT(ip_b1, ip_a1, 16);
|
|
||||||
sum_2 = __SMLAD(op_a, op_b, sum_2);
|
|
||||||
|
|
||||||
op_a = arm_nn_read_q15x2(col_pos + 2);
|
|
||||||
op_b = arm_nn_read_q15x2(col_pos + input_ch + 2);
|
|
||||||
|
|
||||||
op_c = __PKHBT(op_b, op_a, 16);
|
|
||||||
op_a = __PKHTB(op_b, op_a, 16);
|
|
||||||
op_b = __PKHTB(ip_a2, ip_b2, 16);
|
|
||||||
sum_3 = __SMLAD(op_c, op_b, sum_3);
|
|
||||||
|
|
||||||
op_b = __PKHTB(ip_a1, ip_b1, 16);
|
|
||||||
sum_4 = __SMLAD(op_a, op_b, sum_4);
|
|
||||||
|
|
||||||
row_pos += input_ch << 1;
|
|
||||||
col_pos += input_ch << 1;
|
|
||||||
col_count--;
|
|
||||||
}
|
|
||||||
|
|
||||||
col_count = (kernel_x * kernel_y) & 0x1;
|
|
||||||
while (col_count)
|
|
||||||
{
|
|
||||||
sum += row_pos[0] * col_pos[0];
|
|
||||||
sum_2 += row_pos[1] * col_pos[1];
|
|
||||||
sum_3 += row_pos[2] * col_pos[2];
|
|
||||||
sum_4 += row_pos[3] * col_pos[3];
|
|
||||||
|
|
||||||
row_pos += input_ch;
|
|
||||||
col_pos += input_ch;
|
|
||||||
|
|
||||||
col_count--;
|
|
||||||
}
|
|
||||||
sum = arm_nn_requantize(sum, *output_mult++, *output_shift++);
|
|
||||||
sum += output_offset;
|
|
||||||
sum = MAX(sum, output_activation_min);
|
|
||||||
sum = MIN(sum, output_activation_max);
|
|
||||||
*output++ = (q7_t)sum;
|
|
||||||
|
|
||||||
sum_2 = arm_nn_requantize(sum_2, *output_mult++, *output_shift++);
|
|
||||||
sum_2 += output_offset;
|
|
||||||
sum_2 = MAX(sum_2, output_activation_min);
|
|
||||||
sum_2 = MIN(sum_2, output_activation_max);
|
|
||||||
*output++ = (q7_t)sum_2;
|
|
||||||
sum_3 = arm_nn_requantize(sum_3, *output_mult++, *output_shift++);
|
|
||||||
sum_3 += output_offset;
|
|
||||||
sum_3 = MAX(sum_3, output_activation_min);
|
|
||||||
sum_3 = MIN(sum_3, output_activation_max);
|
|
||||||
*output++ = (q7_t)sum_3;
|
|
||||||
|
|
||||||
sum_4 = arm_nn_requantize(sum_4, *output_mult++, *output_shift++);
|
|
||||||
sum_4 += output_offset;
|
|
||||||
sum_4 = MAX(sum_4, output_activation_min);
|
|
||||||
sum_4 = MIN(sum_4, output_activation_max);
|
|
||||||
*output++ = (q7_t)sum_4;
|
|
||||||
|
|
||||||
row_count--;
|
|
||||||
}
|
|
||||||
|
|
||||||
row_count = output_ch & 0x3;
|
|
||||||
while (row_count)
|
|
||||||
{
|
|
||||||
q15_t *col_pos = col_buffer_start + row_shift;
|
|
||||||
const q7_t *row_pos = kernel + row_shift;
|
|
||||||
q31_t sum = *bias++;
|
|
||||||
const uint16_t col_count = (kernel_x * kernel_y);
|
|
||||||
row_shift += 1;
|
|
||||||
|
|
||||||
for (int i = 0; i < col_count; i++)
|
|
||||||
{
|
|
||||||
sum += row_pos[i * input_ch] * col_pos[i * input_ch];
|
|
||||||
}
|
|
||||||
sum = arm_nn_requantize(sum, *output_mult++, *output_shift++);
|
|
||||||
sum += output_offset;
|
|
||||||
sum = MAX(sum, output_activation_min);
|
|
||||||
sum = MIN(sum, output_activation_max);
|
|
||||||
*output++ = (q7_t)sum;
|
|
||||||
|
|
||||||
row_count--;
|
|
||||||
}
|
|
||||||
|
|
||||||
// clear counter and pointers
|
|
||||||
col_buffer = col_buffer_start;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
/* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
|
|
||||||
return arm_depthwise_conv_s8(ctx,
|
|
||||||
dw_conv_params,
|
|
||||||
quant_params,
|
|
||||||
input_dims,
|
|
||||||
input,
|
|
||||||
filter_dims,
|
|
||||||
kernel,
|
|
||||||
bias_dims,
|
|
||||||
bias,
|
|
||||||
output_dims,
|
|
||||||
output);
|
|
||||||
#endif /* ARM_MATH_MVEI | ARM_MATH_DSP */
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
int32_t arm_depthwise_conv_s8_opt_get_buffer_size(const cmsis_nn_dims *input_dims, const cmsis_nn_dims *filter_dims)
|
|
||||||
{
|
|
||||||
#if defined(ARM_MATH_MVEI)
|
|
||||||
/* The + 4 accounts for out of bounds read of the lhs buffers in the *_nt_t_* functions. */
|
|
||||||
return (2 * input_dims->c * filter_dims->w * filter_dims->h) * (int32_t)sizeof(int16_t) + 4;
|
|
||||||
#elif defined(ARM_MATH_DSP)
|
|
||||||
return (input_dims->c * filter_dims->w * filter_dims->h) * sizeof(int16_t);
|
|
||||||
#else
|
|
||||||
(void)input_dims;
|
|
||||||
(void)filter_dims;
|
|
||||||
return 0;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,336 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_depthwise_conv_u8_basic_ver1.c
|
|
||||||
* Description: u8 depthwise convolution function
|
|
||||||
*
|
|
||||||
* $Date: 09. October 2020
|
|
||||||
* $Revision: V.1.1.1
|
|
||||||
*
|
|
||||||
* Target : Cortex-M CPUs
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
static void depthwise_conv_u8_mult_4(const uint8_t *input,
|
|
||||||
const int32_t input_x,
|
|
||||||
const int32_t input_y,
|
|
||||||
const int32_t input_ch,
|
|
||||||
const uint8_t *kernel,
|
|
||||||
const int32_t output_ch,
|
|
||||||
const int32_t ch_mult,
|
|
||||||
const int32_t kernel_x,
|
|
||||||
const int32_t kernel_y,
|
|
||||||
const int32_t pad_x,
|
|
||||||
const int32_t pad_y,
|
|
||||||
const int32_t stride_x,
|
|
||||||
const int32_t stride_y,
|
|
||||||
const int32_t *bias,
|
|
||||||
uint8_t *output,
|
|
||||||
const int32_t output_shift,
|
|
||||||
const int32_t output_mult,
|
|
||||||
const int32_t output_x,
|
|
||||||
const int32_t output_y,
|
|
||||||
const int32_t output_offset,
|
|
||||||
const int32_t input_offset,
|
|
||||||
const int32_t filter_offset,
|
|
||||||
const int32_t output_activation_min,
|
|
||||||
const int32_t output_activation_max)
|
|
||||||
{
|
|
||||||
for (int32_t in_h = -pad_y, out_h = 0, out_idx = 0; out_h < output_y; in_h += stride_y, ++out_h)
|
|
||||||
{
|
|
||||||
for (int32_t in_w = -pad_x, out_w = 0, ker_h_start = MAX(0, -in_h); out_w < output_x; in_w += stride_x, ++out_w)
|
|
||||||
{
|
|
||||||
for (int32_t in_ch = 0, out_ch = 0, ker_w_start = MAX(0, -in_w); out_ch < output_ch;
|
|
||||||
++in_ch, out_ch += ch_mult)
|
|
||||||
{
|
|
||||||
for (int mult_tile = 0; mult_tile < ch_mult; mult_tile += 4)
|
|
||||||
{
|
|
||||||
int32_t out_buff[4];
|
|
||||||
|
|
||||||
out_buff[0] = 0;
|
|
||||||
out_buff[1] = 0;
|
|
||||||
out_buff[2] = 0;
|
|
||||||
out_buff[3] = 0;
|
|
||||||
|
|
||||||
for (int32_t ker_h = ker_h_start; ker_h < MIN(kernel_y, input_y - in_h); ++ker_h)
|
|
||||||
{
|
|
||||||
int32_t ker_idx = ker_h * (output_ch * kernel_x) + ker_w_start * output_ch + out_ch;
|
|
||||||
int32_t in_idx = (in_h + ker_h) * (input_ch * input_x) + in_w * input_ch + in_ch;
|
|
||||||
|
|
||||||
for (int32_t ker_w = ker_w_start; ker_w < MIN(kernel_x, input_x - in_w);
|
|
||||||
++ker_w, ker_idx += output_ch)
|
|
||||||
{
|
|
||||||
int32_t in_val = input[in_idx + ker_w * input_ch] + input_offset;
|
|
||||||
out_buff[0] += in_val * (kernel[ker_idx + 0 + mult_tile] + filter_offset);
|
|
||||||
out_buff[1] += in_val * (kernel[ker_idx + 1 + mult_tile] + filter_offset);
|
|
||||||
out_buff[2] += in_val * (kernel[ker_idx + 2 + mult_tile] + filter_offset);
|
|
||||||
out_buff[3] += in_val * (kernel[ker_idx + 3 + mult_tile] + filter_offset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bias != NULL)
|
|
||||||
{
|
|
||||||
out_buff[0] += bias[out_ch + 0 + mult_tile];
|
|
||||||
out_buff[1] += bias[out_ch + 1 + mult_tile];
|
|
||||||
out_buff[2] += bias[out_ch + 2 + mult_tile];
|
|
||||||
out_buff[3] += bias[out_ch + 3 + mult_tile];
|
|
||||||
}
|
|
||||||
out_buff[0] = arm_nn_requantize(out_buff[0], output_mult, output_shift);
|
|
||||||
out_buff[1] = arm_nn_requantize(out_buff[1], output_mult, output_shift);
|
|
||||||
out_buff[2] = arm_nn_requantize(out_buff[2], output_mult, output_shift);
|
|
||||||
out_buff[3] = arm_nn_requantize(out_buff[3], output_mult, output_shift);
|
|
||||||
|
|
||||||
out_buff[0] += output_offset;
|
|
||||||
out_buff[1] += output_offset;
|
|
||||||
out_buff[2] += output_offset;
|
|
||||||
out_buff[3] += output_offset;
|
|
||||||
|
|
||||||
out_buff[0] = MIN(MAX(out_buff[0], output_activation_min), output_activation_max);
|
|
||||||
out_buff[1] = MIN(MAX(out_buff[1], output_activation_min), output_activation_max);
|
|
||||||
out_buff[2] = MIN(MAX(out_buff[2], output_activation_min), output_activation_max);
|
|
||||||
out_buff[3] = MIN(MAX(out_buff[3], output_activation_min), output_activation_max);
|
|
||||||
|
|
||||||
output[out_idx++] = (uint8_t)out_buff[0];
|
|
||||||
output[out_idx++] = (uint8_t)out_buff[1];
|
|
||||||
output[out_idx++] = (uint8_t)out_buff[2];
|
|
||||||
output[out_idx++] = (uint8_t)out_buff[3];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void depthwise_conv_u8_generic(const uint8_t *input,
|
|
||||||
const int32_t input_x,
|
|
||||||
const int32_t input_y,
|
|
||||||
const int32_t input_ch,
|
|
||||||
const uint8_t *kernel,
|
|
||||||
const int32_t output_ch,
|
|
||||||
const int32_t ch_mult,
|
|
||||||
const int32_t kernel_x,
|
|
||||||
const int32_t kernel_y,
|
|
||||||
const int32_t pad_x,
|
|
||||||
const int32_t pad_y,
|
|
||||||
const int32_t stride_x,
|
|
||||||
const int32_t stride_y,
|
|
||||||
const int32_t *bias,
|
|
||||||
uint8_t *output,
|
|
||||||
const int32_t output_shift,
|
|
||||||
const int32_t output_mult,
|
|
||||||
const int32_t output_x,
|
|
||||||
const int32_t output_y,
|
|
||||||
const int32_t output_offset,
|
|
||||||
const int32_t input_offset,
|
|
||||||
const int32_t filter_offset,
|
|
||||||
const int32_t output_activation_min,
|
|
||||||
const int32_t output_activation_max)
|
|
||||||
{
|
|
||||||
(void)output_ch;
|
|
||||||
int i_out = 0;
|
|
||||||
for (int i_out_y = 0; i_out_y < output_y; i_out_y++)
|
|
||||||
{
|
|
||||||
const int16_t base_idx_y = (i_out_y * stride_y) - pad_y;
|
|
||||||
for (int i_out_x = 0; i_out_x < output_x; i_out_x++)
|
|
||||||
{
|
|
||||||
const int16_t base_idx_x = (i_out_x * stride_x) - pad_x;
|
|
||||||
for (int i_input_ch = 0; i_input_ch < input_ch; i_input_ch++)
|
|
||||||
{
|
|
||||||
for (int i_ch_mult = 0; i_ch_mult < ch_mult; i_ch_mult++)
|
|
||||||
{
|
|
||||||
const int idx_out_ch = i_ch_mult + i_input_ch * ch_mult;
|
|
||||||
int32_t acc_0;
|
|
||||||
/* Condition for kernel start dimension: (base_idx_<x,y> + ker_<x,y>_start) >= 0 */
|
|
||||||
const int ker_y_start = MAX(0, -base_idx_y);
|
|
||||||
const int ker_x_start = MAX(0, -base_idx_x);
|
|
||||||
/* Condition for kernel end dimension: (base_idx_<x,y> + ker_<x,y>_end) < input_<x,y> */
|
|
||||||
const int ker_y_end = MIN(kernel_y, input_y - base_idx_y);
|
|
||||||
const int ker_x_end = MIN(kernel_x, input_x - base_idx_x);
|
|
||||||
acc_0 = 0;
|
|
||||||
|
|
||||||
for (int i_ker_y = ker_y_start; i_ker_y < ker_y_end; i_ker_y++)
|
|
||||||
{
|
|
||||||
const int32_t idx_y = base_idx_y + i_ker_y;
|
|
||||||
for (int i_ker_x = ker_x_start; i_ker_x < ker_x_end; i_ker_x++)
|
|
||||||
{
|
|
||||||
const int32_t idx_x = base_idx_x + i_ker_x;
|
|
||||||
int32_t idx_0 = (idx_y * input_x + idx_x) * input_ch + i_input_ch;
|
|
||||||
int32_t ker_idx_0 = (i_ker_y * kernel_x + i_ker_x) * (input_ch * ch_mult) + idx_out_ch;
|
|
||||||
|
|
||||||
acc_0 += (input[idx_0] + input_offset) * (kernel[ker_idx_0] + filter_offset);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (bias != NULL)
|
|
||||||
{
|
|
||||||
acc_0 += bias[idx_out_ch];
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Requantize and clamp output to provided range */
|
|
||||||
acc_0 = arm_nn_requantize(acc_0, output_mult, output_shift);
|
|
||||||
acc_0 += output_offset;
|
|
||||||
acc_0 = MAX(acc_0, output_activation_min);
|
|
||||||
acc_0 = MIN(acc_0, output_activation_max);
|
|
||||||
|
|
||||||
output[i_out++] = acc_0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief uint8 depthwise convolution function with asymmetric quantization
|
|
||||||
*
|
|
||||||
* @param[in] input Pointer to input tensor
|
|
||||||
* @param[in] input_x Width of input tensor
|
|
||||||
* @param[in] input_y Height of input tensor
|
|
||||||
* @param[in] input_ch Channels in input tensor
|
|
||||||
* @param[in] kernel Pointer to kernel weights
|
|
||||||
* @param[in] kernel_x Width of kernel
|
|
||||||
* @param[in] kernel_y Height of kernel
|
|
||||||
* @param[in] ch_mult Number of channel multiplier
|
|
||||||
* @param[in] pad_x Padding sizes x
|
|
||||||
* @param[in] pad_y Padding sizes y
|
|
||||||
* @param[in] stride_x Convolution stride along the width
|
|
||||||
* @param[in] stride_y Convolution stride along the height
|
|
||||||
* @param[in] dilation_x Dilation along width. Not used and intended for future enhancement.
|
|
||||||
* @param[in] dilation_y Dilation along height. Not used and intended for future enhancement.
|
|
||||||
* @param[in] bias Pointer to optional bias values. If no bias is
|
|
||||||
* available, NULL is expected
|
|
||||||
* @param[in] input_offset Input tensor zero offset
|
|
||||||
* @param[in] filter_offset Kernel tensor zero offset
|
|
||||||
* @param[in] output_offset Output tensor zero offset
|
|
||||||
* @param[in,out] output Pointer to output tensor
|
|
||||||
* @param[in] output_x Width of output tensor
|
|
||||||
* @param[in] output_y Height of output tensor
|
|
||||||
* @param[in] output_activation_min Minimum value to clamp the output to. Range : {0, 255}
|
|
||||||
* @param[in] output_activation_max Minimum value to clamp the output to. Range : {0, 255}
|
|
||||||
* @param[in] output_shift Amount of right-shift for output
|
|
||||||
* @param[in] output_mult Output multiplier for requantization
|
|
||||||
* @return The function returns one of the following
|
|
||||||
* <code>ARM_MATH_SIZE_MISMATCH</code> - Not supported dimension of tensors
|
|
||||||
* <code>ARM_MATH_SUCCESS</code> - Successful operation
|
|
||||||
* <code>ARM_MATH_ARGUMENT_ERROR</code> - Implementation not available
|
|
||||||
*
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_depthwise_conv_u8_basic_ver1(const uint8_t *input,
|
|
||||||
const uint16_t input_x,
|
|
||||||
const uint16_t input_y,
|
|
||||||
const uint16_t input_ch,
|
|
||||||
const uint8_t *kernel,
|
|
||||||
const uint16_t kernel_x,
|
|
||||||
const uint16_t kernel_y,
|
|
||||||
const int16_t ch_mult,
|
|
||||||
const int16_t pad_x,
|
|
||||||
const int16_t pad_y,
|
|
||||||
const int16_t stride_x,
|
|
||||||
const int16_t stride_y,
|
|
||||||
const int16_t dilation_x,
|
|
||||||
const int16_t dilation_y,
|
|
||||||
const int32_t *bias,
|
|
||||||
const int32_t input_offset,
|
|
||||||
const int32_t filter_offset,
|
|
||||||
const int32_t output_offset,
|
|
||||||
uint8_t *output,
|
|
||||||
const uint16_t output_x,
|
|
||||||
const uint16_t output_y,
|
|
||||||
const int32_t output_activation_min,
|
|
||||||
const int32_t output_activation_max,
|
|
||||||
const int32_t output_shift,
|
|
||||||
const int32_t output_mult)
|
|
||||||
{
|
|
||||||
(void)dilation_x;
|
|
||||||
(void)dilation_y;
|
|
||||||
|
|
||||||
if (ch_mult % 4 == 0)
|
|
||||||
{
|
|
||||||
depthwise_conv_u8_mult_4(input,
|
|
||||||
input_x,
|
|
||||||
input_y,
|
|
||||||
input_ch,
|
|
||||||
kernel,
|
|
||||||
ch_mult * input_ch,
|
|
||||||
ch_mult,
|
|
||||||
kernel_x,
|
|
||||||
kernel_y,
|
|
||||||
pad_x,
|
|
||||||
pad_y,
|
|
||||||
stride_x,
|
|
||||||
stride_y,
|
|
||||||
bias,
|
|
||||||
output,
|
|
||||||
output_shift,
|
|
||||||
output_mult,
|
|
||||||
output_x,
|
|
||||||
output_y,
|
|
||||||
output_offset,
|
|
||||||
input_offset,
|
|
||||||
filter_offset,
|
|
||||||
output_activation_min,
|
|
||||||
output_activation_max);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
depthwise_conv_u8_generic(input,
|
|
||||||
input_x,
|
|
||||||
input_y,
|
|
||||||
input_ch,
|
|
||||||
kernel,
|
|
||||||
ch_mult * input_ch,
|
|
||||||
ch_mult,
|
|
||||||
kernel_x,
|
|
||||||
kernel_y,
|
|
||||||
pad_x,
|
|
||||||
pad_y,
|
|
||||||
stride_x,
|
|
||||||
stride_y,
|
|
||||||
bias,
|
|
||||||
output,
|
|
||||||
output_shift,
|
|
||||||
output_mult,
|
|
||||||
output_x,
|
|
||||||
output_y,
|
|
||||||
output_offset,
|
|
||||||
input_offset,
|
|
||||||
filter_offset,
|
|
||||||
output_activation_min,
|
|
||||||
output_activation_max);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,135 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_depthwise_conv_wrapper_s8.c
|
|
||||||
* Description: Wrapper API to select appropriate depthwise conv API based
|
|
||||||
* on dimensions.
|
|
||||||
*
|
|
||||||
* $Date: 20. Dec 2021
|
|
||||||
* $Revision: V.1.4.0
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M CPUs
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
* s8 Depthwise conv wrapper function
|
|
||||||
*
|
|
||||||
* Refer header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
arm_status arm_depthwise_conv_wrapper_s8(const cmsis_nn_context *ctx,
|
|
||||||
const cmsis_nn_dw_conv_params *dw_conv_params,
|
|
||||||
const cmsis_nn_per_channel_quant_params *quant_params,
|
|
||||||
const cmsis_nn_dims *input_dims,
|
|
||||||
const q7_t *input,
|
|
||||||
const cmsis_nn_dims *filter_dims,
|
|
||||||
const q7_t *filter,
|
|
||||||
const cmsis_nn_dims *bias_dims,
|
|
||||||
const int32_t *bias,
|
|
||||||
const cmsis_nn_dims *output_dims,
|
|
||||||
q7_t *output)
|
|
||||||
{
|
|
||||||
arm_status status = ARM_MATH_SUCCESS;
|
|
||||||
if (1 == dw_conv_params->ch_mult && input_dims->n == 1 && dw_conv_params->dilation.w == 1 &&
|
|
||||||
dw_conv_params->dilation.h == 1)
|
|
||||||
{
|
|
||||||
#if !defined(ARM_MATH_MVEI)
|
|
||||||
if ((filter_dims->w == 3) && (filter_dims->h == 3) && (dw_conv_params->padding.h <= 1) &&
|
|
||||||
(dw_conv_params->padding.w <= 1))
|
|
||||||
{
|
|
||||||
status = arm_depthwise_conv_3x3_s8(ctx,
|
|
||||||
dw_conv_params,
|
|
||||||
quant_params,
|
|
||||||
input_dims,
|
|
||||||
input,
|
|
||||||
filter_dims,
|
|
||||||
filter,
|
|
||||||
bias_dims,
|
|
||||||
bias,
|
|
||||||
output_dims,
|
|
||||||
output);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
#endif
|
|
||||||
{
|
|
||||||
status = arm_depthwise_conv_s8_opt(ctx,
|
|
||||||
dw_conv_params,
|
|
||||||
quant_params,
|
|
||||||
input_dims,
|
|
||||||
input,
|
|
||||||
filter_dims,
|
|
||||||
filter,
|
|
||||||
bias_dims,
|
|
||||||
bias,
|
|
||||||
output_dims,
|
|
||||||
output);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
status = arm_depthwise_conv_s8(ctx,
|
|
||||||
dw_conv_params,
|
|
||||||
quant_params,
|
|
||||||
input_dims,
|
|
||||||
input,
|
|
||||||
filter_dims,
|
|
||||||
filter,
|
|
||||||
bias_dims,
|
|
||||||
bias,
|
|
||||||
output_dims,
|
|
||||||
output);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return status;
|
|
||||||
}
|
|
||||||
|
|
||||||
int32_t arm_depthwise_conv_wrapper_s8_get_buffer_size(const cmsis_nn_dw_conv_params *dw_conv_params,
|
|
||||||
const cmsis_nn_dims *input_dims,
|
|
||||||
const cmsis_nn_dims *filter_dims,
|
|
||||||
const cmsis_nn_dims *output_dims)
|
|
||||||
{
|
|
||||||
(void)dw_conv_params;
|
|
||||||
int32_t size = 0;
|
|
||||||
|
|
||||||
if (input_dims->c == output_dims->c && input_dims->n == 1 && dw_conv_params->dilation.w == 1 &&
|
|
||||||
dw_conv_params->dilation.h == 1)
|
|
||||||
{
|
|
||||||
size = arm_depthwise_conv_s8_opt_get_buffer_size(input_dims, filter_dims);
|
|
||||||
}
|
|
||||||
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,422 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_depthwise_separable_conv_HWC_q7.c
|
|
||||||
* Description: Q7 depthwise separable convolution function
|
|
||||||
*
|
|
||||||
* $Date: July 20, 2021
|
|
||||||
* $Revision: V.1.1.2
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Q7 depthwise separable convolution function
|
|
||||||
* @param[in] Im_in pointer to input tensor
|
|
||||||
* @param[in] dim_im_in input tensor dimension
|
|
||||||
* @param[in] ch_im_in number of input tensor channels
|
|
||||||
* @param[in] wt pointer to kernel weights
|
|
||||||
* @param[in] ch_im_out number of filters, i.e., output tensor channels
|
|
||||||
* @param[in] dim_kernel filter kernel size
|
|
||||||
* @param[in] padding padding sizes
|
|
||||||
* @param[in] stride convolution stride
|
|
||||||
* @param[in] bias pointer to bias
|
|
||||||
* @param[in] bias_shift amount of left-shift for bias
|
|
||||||
* @param[in] out_shift amount of right-shift for output
|
|
||||||
* @param[in,out] Im_out pointer to output tensor
|
|
||||||
* @param[in] dim_im_out output tensor dimension
|
|
||||||
* @param[in,out] bufferA pointer to buffer space for input
|
|
||||||
* @param[in,out] bufferB pointer to buffer space for output
|
|
||||||
* @return The function returns either
|
|
||||||
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
|
|
||||||
*
|
|
||||||
* @details
|
|
||||||
*
|
|
||||||
* <b>Buffer size:</b>
|
|
||||||
*
|
|
||||||
* bufferA size: 2*ch_im_in*dim_kernel*dim_kernel
|
|
||||||
*
|
|
||||||
* bufferB size: 0
|
|
||||||
*
|
|
||||||
* <b>Input dimension constraints:</b>
|
|
||||||
*
|
|
||||||
* ch_im_in equals ch_im_out
|
|
||||||
*
|
|
||||||
* Implementation:
|
|
||||||
* There are 3 nested loop here:
|
|
||||||
* Inner loop: calculate each output value with MAC instruction over an accumulator
|
|
||||||
* Mid loop: loop over different output channel
|
|
||||||
* Outer loop: loop over different output (x, y)
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_depthwise_separable_conv_HWC_q7(const q7_t *Im_in,
|
|
||||||
const uint16_t dim_im_in,
|
|
||||||
const uint16_t ch_im_in,
|
|
||||||
const q7_t *wt,
|
|
||||||
const uint16_t ch_im_out,
|
|
||||||
const uint16_t dim_kernel,
|
|
||||||
const uint16_t padding,
|
|
||||||
const uint16_t stride,
|
|
||||||
const q7_t *bias,
|
|
||||||
const uint16_t bias_shift,
|
|
||||||
const uint16_t out_shift,
|
|
||||||
q7_t *Im_out,
|
|
||||||
const uint16_t dim_im_out,
|
|
||||||
q15_t *bufferA,
|
|
||||||
q7_t *bufferB)
|
|
||||||
{
|
|
||||||
(void)bufferB;
|
|
||||||
#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
|
|
||||||
/* Run the following code for Cortex-M4 and Cortex-M7 */
|
|
||||||
|
|
||||||
int16_t i_out_y, i_out_x;
|
|
||||||
int16_t i_ker_y, i_ker_x;
|
|
||||||
q7_t *colBuffer = (q7_t *)bufferA;
|
|
||||||
q7_t *pBuffer = colBuffer;
|
|
||||||
const q7_t *pBias = bias;
|
|
||||||
q7_t *pOut = Im_out;
|
|
||||||
uint16_t rowCnt;
|
|
||||||
uint16_t row_shift;
|
|
||||||
|
|
||||||
/* do some checking here, basically ch_im_in == ch_im_out */
|
|
||||||
if (ch_im_in != ch_im_out)
|
|
||||||
{
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++)
|
|
||||||
{
|
|
||||||
for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++)
|
|
||||||
{
|
|
||||||
/* we first do im2col here */
|
|
||||||
for (i_ker_y = i_out_y * stride - padding; i_ker_y < i_out_y * stride - padding + dim_kernel; i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = i_out_x * stride - padding; i_ker_x < i_out_x * stride - padding + dim_kernel; i_ker_x++)
|
|
||||||
{
|
|
||||||
if (i_ker_y < 0 || i_ker_y >= dim_im_in || i_ker_x < 0 || i_ker_x >= dim_im_in)
|
|
||||||
{
|
|
||||||
/* arm_fill_q7(0, pBuffer, ch_im_in); */
|
|
||||||
memset(pBuffer, 0, ch_im_in);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* arm_copy_q7((q7_t *) Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, pBuffer, ch_im_in);
|
|
||||||
*/
|
|
||||||
memcpy(pBuffer, (q7_t *)Im_in + (i_ker_y * dim_im_in + i_ker_x) * ch_im_in, ch_im_in);
|
|
||||||
}
|
|
||||||
pBuffer += ch_im_in;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* we will do the computation here for each channel */
|
|
||||||
rowCnt = ch_im_out >> 2;
|
|
||||||
row_shift = 0;
|
|
||||||
pBias = bias;
|
|
||||||
|
|
||||||
while (rowCnt)
|
|
||||||
{
|
|
||||||
q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
|
|
||||||
uint16_t colCnt = (dim_kernel * dim_kernel) >> 1;
|
|
||||||
q7_t *pB = colBuffer + row_shift;
|
|
||||||
const q7_t *pA = wt + row_shift;
|
|
||||||
row_shift += 4;
|
|
||||||
|
|
||||||
#ifdef USE_INTRINSIC
|
|
||||||
|
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
|
||||||
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q31_t inA1, inA2, inB1, inB2, opA, opB;
|
|
||||||
|
|
||||||
inB1 = arm_nn_read_q7x4(pB);
|
|
||||||
pB += ch_im_in;
|
|
||||||
opB = arm_nn_read_q7x4(pB);
|
|
||||||
pB += ch_im_in;
|
|
||||||
inB2 = __PKHTB(opB, inB1, 16);
|
|
||||||
inB1 = __PKHBT(inB1, opB, 16);
|
|
||||||
inA1 = arm_nn_read_q7x4(pA);
|
|
||||||
pA += ch_im_in;
|
|
||||||
opB = arm_nn_read_q7x4(pA);
|
|
||||||
pA += ch_im_in;
|
|
||||||
inA2 = __PKHTB(opB, inA1, 16);
|
|
||||||
inA1 = __PKHBT(inA1, opB, 16);
|
|
||||||
opA = __SXTB16(inA1);
|
|
||||||
opB = __SXTB16(inB1);
|
|
||||||
sum = __SMLAD(opA, opB, sum);
|
|
||||||
opA = __SXTB16(__ROR(inA1, 8));
|
|
||||||
opB = __SXTB16(__ROR(inB1, 8));
|
|
||||||
sum2 = __SMLAD(opA, opB, sum2);
|
|
||||||
opA = __SXTB16(inA2);
|
|
||||||
opB = __SXTB16(inB2);
|
|
||||||
sum3 = __SMLAD(opA, opB, sum3);
|
|
||||||
opA = __SXTB16(__ROR(inA2, 8));
|
|
||||||
opB = __SXTB16(__ROR(inB2, 8));
|
|
||||||
sum4 = __SMLAD(opA, opB, sum4);
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q31_t inA1, inA2, inB1, inB2, opA, opB;
|
|
||||||
|
|
||||||
inB1 = arm_nn_read_q7x4(pB);
|
|
||||||
pB += ch_im_in;
|
|
||||||
opB = arm_nn_read_q7x4(pB);
|
|
||||||
pB += ch_im_in;
|
|
||||||
inB2 = __PKHBT(opB, inB1, 16);
|
|
||||||
inB1 = __PKHTB(inB1, opB, 16);
|
|
||||||
inA1 = arm_nn_read_q7x4(pA);
|
|
||||||
pA += ch_im_in;
|
|
||||||
opB = arm_nn_read_q7x4(pA);
|
|
||||||
pA += ch_im_in;
|
|
||||||
inA2 = __PKHBT(opB, inA1, 16);
|
|
||||||
inA1 = __PKHTB(inA1, opB, 16);
|
|
||||||
opA = __SXTB16(inA1);
|
|
||||||
opB = __SXTB16(inB1);
|
|
||||||
sum2 = __SMLAD(opA, opB, sum2);
|
|
||||||
opA = __SXTB16(__ROR(inA1, 8));
|
|
||||||
opB = __SXTB16(__ROR(inB1, 8));
|
|
||||||
sum = __SMLAD(opA, opB, sum);
|
|
||||||
opA = __SXTB16(inA2);
|
|
||||||
opB = __SXTB16(inB2);
|
|
||||||
sum4 = __SMLAD(opA, opB, sum4);
|
|
||||||
opA = __SXTB16(__ROR(inA2, 8));
|
|
||||||
opB = __SXTB16(__ROR(inB2, 8));
|
|
||||||
sum3 = __SMLAD(opA, opB, sum3);
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* ARM_MATH_BIG_ENDIAN */
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
|
||||||
/*
|
|
||||||
* r0 r1 r2 r3 r4 r5
|
|
||||||
* inA1, inA2, inB1, inB2, opA, opB
|
|
||||||
*/
|
|
||||||
|
|
||||||
asm volatile("COL_LOOP_%=:\n"
|
|
||||||
"ldr.w r2, [%[pB], #0]\n"
|
|
||||||
"add.w %[pB], %[pB], %[ch_im_in]\n"
|
|
||||||
"ldr.w r5, [%[pB], #0]\n"
|
|
||||||
"add.w %[pB], %[pB], %[ch_im_in]\n"
|
|
||||||
"pkhtb r3, r5, r2, ASR #16\n"
|
|
||||||
"pkhbt r2, r2, r5, LSL #16\n"
|
|
||||||
"ldr.w r0, [%[pA], #0]\n"
|
|
||||||
"add.w %[pA], %[pA], %[ch_im_in]\n"
|
|
||||||
"ldr.w r5, [%[pA], #0]\n"
|
|
||||||
"add.w %[pA], %[pA], %[ch_im_in]\n"
|
|
||||||
"pkhtb r1, r5, r0, ASR #16\n"
|
|
||||||
"pkhbt r0, r0, r5, LSL #16\n"
|
|
||||||
"sxtb16 r4, r0\n"
|
|
||||||
"sxtb16 r5, r2\n"
|
|
||||||
"smlad %[sum], r4, r5, %[sum]\n"
|
|
||||||
"mov.w r4, r0, ror #8\n"
|
|
||||||
"mov.w r5, r2, ror #8\n"
|
|
||||||
"sxtb16 r4, r4\n"
|
|
||||||
"sxtb16 r5, r5\n"
|
|
||||||
"smlad %[sum2], r4, r5, %[sum2]\n"
|
|
||||||
"sxtb16 r4, r1\n"
|
|
||||||
"sxtb16 r5, r3\n"
|
|
||||||
"smlad %[sum3], r4, r5, %[sum3]\n"
|
|
||||||
"mov.w r4, r1, ror #8\n"
|
|
||||||
"mov.w r5, r3, ror #8\n"
|
|
||||||
"sxtb16 r4, r4\n"
|
|
||||||
"sxtb16 r5, r5\n"
|
|
||||||
"smlad %[sum4], r4, r5, %[sum4]\n"
|
|
||||||
"subs %[colCnt], #1\n"
|
|
||||||
"bne COL_LOOP_%=\n"
|
|
||||||
: [ sum ] "+r"(sum),
|
|
||||||
[ sum2 ] "+r"(sum2),
|
|
||||||
[ sum3 ] "+r"(sum3),
|
|
||||||
[ sum4 ] "+r"(sum4),
|
|
||||||
[ pB ] "+r"(pB),
|
|
||||||
[ pA ] "+r"(pA)
|
|
||||||
: [ colCnt ] "r"(colCnt), [ ch_im_in ] "r"(ch_im_in)
|
|
||||||
: "r0", "r1", "r2", "r3", "r4", "r5");
|
|
||||||
#else
|
|
||||||
/*
|
|
||||||
* r0 r1 r2 r3 r4 r5
|
|
||||||
* inA1, inA2, inB1, inB2, opA, opB
|
|
||||||
*/
|
|
||||||
asm volatile("COL_LOOP_%=:\n"
|
|
||||||
"ldr.w r2, [%[pB], #0]\n"
|
|
||||||
"add.w %[pB], %[pB], %[ch_im_in]\n"
|
|
||||||
"ldr.w r5, [%[pB], #0]\n"
|
|
||||||
"add.w %[pB], %[pB], %[ch_im_in]\n"
|
|
||||||
"pkhbt r3, r5, r2, LSL #16\n"
|
|
||||||
"pkhtb r2, r2, r5, ASR #16\n"
|
|
||||||
"ldr.w r0, [%[pA], #0]\n"
|
|
||||||
"add.w %[pA], %[pA], %[ch_im_in]\n"
|
|
||||||
"ldr.w r5, [%[pA], #0]\n"
|
|
||||||
"add.w %[pA], %[pA], %[ch_im_in]\n"
|
|
||||||
"pkhbt r1, r5, r0, LSL #16\n"
|
|
||||||
"pkhtb r0, r0, r5, ASR #16\n"
|
|
||||||
"sxtb16 r4, r0\n"
|
|
||||||
"sxtb16 r5, r2\n"
|
|
||||||
"smlad %[sum2], r4, r5, %[sum2]\n"
|
|
||||||
"mov.w r4, r0, ror #8\n"
|
|
||||||
"mov.w r5, r2, ror #8\n"
|
|
||||||
"sxtb16 r4, r4\n"
|
|
||||||
"sxtb16 r5, r5\n"
|
|
||||||
"smlad %[sum], r4, r5, %[sum]\n"
|
|
||||||
"sxtb16 r4, r1\n"
|
|
||||||
"sxtb16 r5, r3\n"
|
|
||||||
"smlad %[sum4], r4, r5, %[sum4]\n"
|
|
||||||
"mov.w r4, r1, ror #8\n"
|
|
||||||
"mov.w r5, r3, ror #8\n"
|
|
||||||
"sxtb16 r4, r4\n"
|
|
||||||
"sxtb16 r5, r5\n"
|
|
||||||
"smlad %[sum3], r4, r5, %[sum3]\n"
|
|
||||||
"subs %[colCnt], #1\n"
|
|
||||||
"bne COL_LOOP_%=\n"
|
|
||||||
: [ sum ] "+r"(sum),
|
|
||||||
[ sum2 ] "+r"(sum2),
|
|
||||||
[ sum3 ] "+r"(sum3),
|
|
||||||
[ sum4 ] "+r"(sum4),
|
|
||||||
[ pB ] "+r"(pB),
|
|
||||||
[ pA ] "+r"(pA)
|
|
||||||
: [ colCnt ] "r"(colCnt), [ ch_im_in ] "r"(ch_im_in)
|
|
||||||
: "r0", "r1", "r2", "r3", "r4", "r5");
|
|
||||||
|
|
||||||
#endif /* ARM_MATH_BIG_ENDIAN */
|
|
||||||
|
|
||||||
#endif /* USE_INTRINSIC */
|
|
||||||
|
|
||||||
colCnt = (dim_kernel * dim_kernel) & 0x1;
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
union arm_nnword inA, inB;
|
|
||||||
inA.word = arm_nn_read_q7x4(pA);
|
|
||||||
pA += ch_im_in;
|
|
||||||
inB.word = arm_nn_read_q7x4(pB);
|
|
||||||
pB += ch_im_in;
|
|
||||||
sum += inA.bytes[0] * inB.bytes[0];
|
|
||||||
sum2 += inA.bytes[1] * inB.bytes[1];
|
|
||||||
sum3 += inA.bytes[2] * inB.bytes[2];
|
|
||||||
sum4 += inA.bytes[3] * inB.bytes[3];
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
*pOut++ = (q7_t)__SSAT((sum >> out_shift), 8);
|
|
||||||
*pOut++ = (q7_t)__SSAT((sum2 >> out_shift), 8);
|
|
||||||
*pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8);
|
|
||||||
*pOut++ = (q7_t)__SSAT((sum4 >> out_shift), 8);
|
|
||||||
|
|
||||||
rowCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
rowCnt = ch_im_out & 0x3;
|
|
||||||
while (rowCnt)
|
|
||||||
{
|
|
||||||
q7_t *pB = colBuffer + row_shift;
|
|
||||||
const q7_t *pA = wt + row_shift;
|
|
||||||
q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
uint16_t colCnt = (dim_kernel * dim_kernel);
|
|
||||||
|
|
||||||
row_shift += 1;
|
|
||||||
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q7_t A1 = *pA;
|
|
||||||
q7_t B1 = *pB;
|
|
||||||
pA += ch_im_in;
|
|
||||||
pB += ch_im_in;
|
|
||||||
sum += A1 * B1;
|
|
||||||
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
*pOut++ = (q7_t)__SSAT((sum >> out_shift), 8);
|
|
||||||
rowCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* clear counter and pointers */
|
|
||||||
pBuffer = colBuffer;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
(void)bufferA;
|
|
||||||
/* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
|
|
||||||
int i_out_y, i_out_x, i_ch_out, i_ker_x, i_ker_y;
|
|
||||||
int conv_out;
|
|
||||||
|
|
||||||
/* do some checking here, basically ch_im_in == ch_im_out */
|
|
||||||
if (ch_im_in != ch_im_out)
|
|
||||||
{
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i_out_y = 0; i_out_y < dim_im_out; i_out_y++)
|
|
||||||
{
|
|
||||||
for (i_out_x = 0; i_out_x < dim_im_out; i_out_x++)
|
|
||||||
{
|
|
||||||
for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++)
|
|
||||||
{
|
|
||||||
// for each output
|
|
||||||
conv_out = ((q31_t)(bias[i_ch_out]) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
for (i_ker_y = 0; i_ker_y < dim_kernel; i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = 0; i_ker_x < dim_kernel; i_ker_x++)
|
|
||||||
{
|
|
||||||
int in_row = stride * i_out_y + i_ker_y - padding;
|
|
||||||
int in_col = stride * i_out_x + i_ker_x - padding;
|
|
||||||
if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in && in_col < dim_im_in)
|
|
||||||
{
|
|
||||||
conv_out += Im_in[(in_row * dim_im_in + in_col) * ch_im_in + i_ch_out] *
|
|
||||||
wt[(i_ker_y * dim_kernel + i_ker_x) * ch_im_out + i_ch_out];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Im_out[(i_out_y * dim_im_out + i_out_x) * ch_im_out + i_ch_out] =
|
|
||||||
(q7_t)__SSAT((conv_out >> out_shift), 8);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* ARM_MATH_DSP */
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,427 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_depthwise_separable_conv_HWC_q7_nonsquare.c
|
|
||||||
* Description: Q7 depthwise separable convolution function (non-square shape)
|
|
||||||
*
|
|
||||||
* $Date: July 20, 2021
|
|
||||||
* $Revision: V.1.1.2
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
*
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @ingroup groupNN
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @addtogroup NNConv
|
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Q7 depthwise separable convolution function (non-square shape)
|
|
||||||
* @param[in] Im_in pointer to input tensor
|
|
||||||
* @param[in] dim_im_in_x input tensor dimension x
|
|
||||||
* @param[in] dim_im_in_y input tensor dimension y
|
|
||||||
* @param[in] ch_im_in number of input tensor channels
|
|
||||||
* @param[in] wt pointer to kernel weights
|
|
||||||
* @param[in] ch_im_out number of filters, i.e., output tensor channels
|
|
||||||
* @param[in] dim_kernel_x filter kernel size x
|
|
||||||
* @param[in] dim_kernel_y filter kernel size y
|
|
||||||
* @param[in] padding_x padding sizes x
|
|
||||||
* @param[in] padding_y padding sizes y
|
|
||||||
* @param[in] stride_x convolution stride x
|
|
||||||
* @param[in] stride_y convolution stride y
|
|
||||||
* @param[in] bias pointer to bias
|
|
||||||
* @param[in] bias_shift amount of left-shift for bias
|
|
||||||
* @param[in] out_shift amount of right-shift for output
|
|
||||||
* @param[in,out] Im_out pointer to output tensor
|
|
||||||
* @param[in] dim_im_out_x output tensor dimension x
|
|
||||||
* @param[in] dim_im_out_y output tensor dimension y
|
|
||||||
* @param[in,out] bufferA pointer to buffer space for input
|
|
||||||
* @param[in,out] bufferB pointer to buffer space for output
|
|
||||||
* @return The function returns either
|
|
||||||
* <code>ARM_MATH_SIZE_MISMATCH</code> or <code>ARM_MATH_SUCCESS</code> based on the outcome of size checking.
|
|
||||||
*
|
|
||||||
* This function is the version with full list of optimization tricks, but with
|
|
||||||
* some constraints:
|
|
||||||
* ch_im_in is equal to ch_im_out
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
arm_status arm_depthwise_separable_conv_HWC_q7_nonsquare(const q7_t *Im_in,
|
|
||||||
const uint16_t dim_im_in_x,
|
|
||||||
const uint16_t dim_im_in_y,
|
|
||||||
const uint16_t ch_im_in,
|
|
||||||
const q7_t *wt,
|
|
||||||
const uint16_t ch_im_out,
|
|
||||||
const uint16_t dim_kernel_x,
|
|
||||||
const uint16_t dim_kernel_y,
|
|
||||||
const uint16_t padding_x,
|
|
||||||
const uint16_t padding_y,
|
|
||||||
const uint16_t stride_x,
|
|
||||||
const uint16_t stride_y,
|
|
||||||
const q7_t *bias,
|
|
||||||
const uint16_t bias_shift,
|
|
||||||
const uint16_t out_shift,
|
|
||||||
q7_t *Im_out,
|
|
||||||
const uint16_t dim_im_out_x,
|
|
||||||
const uint16_t dim_im_out_y,
|
|
||||||
q15_t *bufferA,
|
|
||||||
q7_t *bufferB)
|
|
||||||
{
|
|
||||||
|
|
||||||
(void)bufferB;
|
|
||||||
|
|
||||||
#if defined(ARM_MATH_DSP) && !defined(ARM_MATH_MVEI)
|
|
||||||
/* Run the following code for Cortex-M4 and Cortex-M7 */
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Implementation:
|
|
||||||
* There are 3 nested loop here:
|
|
||||||
* Inner loop: calculate each output value with MAC instruction over an accumulator
|
|
||||||
* Mid loop: loop over different output channel
|
|
||||||
* Outer loop: loop over different output (x, y)
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
int16_t i_out_y, i_out_x;
|
|
||||||
int16_t i_ker_y, i_ker_x;
|
|
||||||
q7_t *colBuffer = (q7_t *)bufferA;
|
|
||||||
q7_t *pBuffer = colBuffer;
|
|
||||||
const q7_t *pBias = bias;
|
|
||||||
q7_t *pOut = Im_out;
|
|
||||||
uint16_t rowCnt;
|
|
||||||
uint16_t row_shift;
|
|
||||||
|
|
||||||
/* do some checking here, basically ch_im_in == ch_im_out */
|
|
||||||
if (ch_im_in != ch_im_out)
|
|
||||||
{
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++)
|
|
||||||
{
|
|
||||||
for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++)
|
|
||||||
{
|
|
||||||
/* we first do im2col here */
|
|
||||||
for (i_ker_y = i_out_y * stride_y - padding_y; i_ker_y < i_out_y * stride_y - padding_y + dim_kernel_y;
|
|
||||||
i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = i_out_x * stride_x - padding_x; i_ker_x < i_out_x * stride_x - padding_x + dim_kernel_x;
|
|
||||||
i_ker_x++)
|
|
||||||
{
|
|
||||||
if (i_ker_y < 0 || i_ker_y >= dim_im_in_y || i_ker_x < 0 || i_ker_x >= dim_im_in_x)
|
|
||||||
{
|
|
||||||
/* arm_fill_q7(0, pBuffer, ch_im_in); */
|
|
||||||
memset(pBuffer, 0, ch_im_in);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* arm_copy_q7((q7_t *) Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, pBuffer,
|
|
||||||
* ch_im_in); */
|
|
||||||
memcpy(pBuffer, (q7_t *)Im_in + (i_ker_y * dim_im_in_x + i_ker_x) * ch_im_in, ch_im_in);
|
|
||||||
}
|
|
||||||
pBuffer += ch_im_in;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* we will do the computation here for each channel */
|
|
||||||
rowCnt = ch_im_out >> 2;
|
|
||||||
row_shift = 0;
|
|
||||||
pBias = bias;
|
|
||||||
|
|
||||||
while (rowCnt)
|
|
||||||
{
|
|
||||||
q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum3 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
|
|
||||||
uint16_t colCnt = (dim_kernel_x * dim_kernel_y) >> 1;
|
|
||||||
q7_t *pB = colBuffer + row_shift;
|
|
||||||
const q7_t *pA = wt + row_shift;
|
|
||||||
row_shift += 4;
|
|
||||||
|
|
||||||
#ifdef USE_INTRINSIC
|
|
||||||
|
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
|
||||||
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q31_t inA1, inA2, inB1, inB2, opA, opB;
|
|
||||||
|
|
||||||
inB1 = arm_nn_read_q7x4(pB);
|
|
||||||
pB += ch_im_in;
|
|
||||||
opB = arm_nn_read_q7x4(pB);
|
|
||||||
pB += ch_im_in;
|
|
||||||
inB2 = __PKHTB(opB, inB1, 16);
|
|
||||||
inB1 = __PKHBT(inB1, opB, 16);
|
|
||||||
inA1 = arm_nn_read_q7x4(pA);
|
|
||||||
pA += ch_im_in;
|
|
||||||
opB = arm_nn_read_q7x4(pA);
|
|
||||||
pA += ch_im_in;
|
|
||||||
inA2 = __PKHTB(opB, inA1, 16);
|
|
||||||
inA1 = __PKHBT(inA1, opB, 16);
|
|
||||||
opA = __SXTB16(inA1);
|
|
||||||
opB = __SXTB16(inB1);
|
|
||||||
sum = __SMLAD(opA, opB, sum);
|
|
||||||
opA = __SXTB16(__ROR(inA1, 8));
|
|
||||||
opB = __SXTB16(__ROR(inB1, 8));
|
|
||||||
sum2 = __SMLAD(opA, opB, sum2);
|
|
||||||
opA = __SXTB16(inA2);
|
|
||||||
opB = __SXTB16(inB2);
|
|
||||||
sum3 = __SMLAD(opA, opB, sum3);
|
|
||||||
opA = __SXTB16(__ROR(inA2, 8));
|
|
||||||
opB = __SXTB16(__ROR(inB2, 8));
|
|
||||||
sum4 = __SMLAD(opA, opB, sum4);
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q31_t inA1, inA2, inB1, inB2, opA, opB;
|
|
||||||
|
|
||||||
inB1 = arm_nn_read_q7x4(pB);
|
|
||||||
pB += ch_im_in;
|
|
||||||
opB = arm_nn_read_q7x4(pB);
|
|
||||||
pB += ch_im_in;
|
|
||||||
inB2 = __PKHBT(opB, inB1, 16);
|
|
||||||
inB1 = __PKHTB(inB1, opB, 16);
|
|
||||||
inA1 = arm_nn_read_q7x4(pA);
|
|
||||||
pA += ch_im_in;
|
|
||||||
opB = arm_nn_read_q7x4(pA);
|
|
||||||
pA += ch_im_in;
|
|
||||||
inA2 = __PKHBT(opB, inA1, 16);
|
|
||||||
inA1 = __PKHTB(inA1, opB, 16);
|
|
||||||
opA = __SXTB16(inA1);
|
|
||||||
opB = __SXTB16(inB1);
|
|
||||||
sum2 = __SMLAD(opA, opB, sum2);
|
|
||||||
opA = __SXTB16(__ROR(inA1, 8));
|
|
||||||
opB = __SXTB16(__ROR(inB1, 8));
|
|
||||||
sum = __SMLAD(opA, opB, sum);
|
|
||||||
opA = __SXTB16(inA2);
|
|
||||||
opB = __SXTB16(inB2);
|
|
||||||
sum4 = __SMLAD(opA, opB, sum4);
|
|
||||||
opA = __SXTB16(__ROR(inA2, 8));
|
|
||||||
opB = __SXTB16(__ROR(inB2, 8));
|
|
||||||
sum3 = __SMLAD(opA, opB, sum3);
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* ARM_MATH_BIG_ENDIAN */
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
|
||||||
// r0 r1 r2 r3 r4 r5
|
|
||||||
// inA1, inA2, inB1, inB2, opA, opB
|
|
||||||
asm volatile("COL_LOOP:\n"
|
|
||||||
"ldr.w r2, [%[pB], #0]\n"
|
|
||||||
"add.w %[pB], %[pB], %[ch_im_in]\n"
|
|
||||||
"ldr.w r5, [%[pB], #0]\n"
|
|
||||||
"add.w %[pB], %[pB], %[ch_im_in]\n"
|
|
||||||
"pkhtb r3, r5, r2, ASR #16\n"
|
|
||||||
"pkhbt r2, r2, r5, LSL #16\n"
|
|
||||||
"ldr.w r0, [%[pA], #0]\n"
|
|
||||||
"add.w %[pA], %[pA], %[ch_im_in]\n"
|
|
||||||
"ldr.w r5, [%[pA], #0]\n"
|
|
||||||
"add.w %[pA], %[pA], %[ch_im_in]\n"
|
|
||||||
"pkhtb r1, r5, r0, ASR #16\n"
|
|
||||||
"pkhbt r0, r0, r5, LSL #16\n"
|
|
||||||
"sxtb16 r4, r0\n"
|
|
||||||
"sxtb16 r5, r2\n"
|
|
||||||
"smlad %[sum], r4, r5, %[sum]\n"
|
|
||||||
"mov.w r4, r0, ror #8\n"
|
|
||||||
"mov.w r5, r2, ror #8\n"
|
|
||||||
"sxtb16 r4, r4\n"
|
|
||||||
"sxtb16 r5, r5\n"
|
|
||||||
"smlad %[sum2], r4, r5, %[sum2]\n"
|
|
||||||
"sxtb16 r4, r1\n"
|
|
||||||
"sxtb16 r5, r3\n"
|
|
||||||
"smlad %[sum3], r4, r5, %[sum3]\n"
|
|
||||||
"mov.w r4, r1, ror #8\n"
|
|
||||||
"mov.w r5, r3, ror #8\n"
|
|
||||||
"sxtb16 r4, r4\n"
|
|
||||||
"sxtb16 r5, r5\n"
|
|
||||||
"smlad %[sum4], r4, r5, %[sum4]\n"
|
|
||||||
"subs %[colCnt], #1\n"
|
|
||||||
"bne COL_LOOP\n"
|
|
||||||
: [ sum ] "+r"(sum),
|
|
||||||
[ sum2 ] "+r"(sum2),
|
|
||||||
[ sum3 ] "+r"(sum3),
|
|
||||||
[ sum4 ] "+r"(sum4),
|
|
||||||
[ pB ] "+r"(pB),
|
|
||||||
[ pA ] "+r"(pA)
|
|
||||||
: [ colCnt ] "r"(colCnt), [ ch_im_in ] "r"(ch_im_in)
|
|
||||||
: "r0", "r1", "r2", "r3", "r4", "r5");
|
|
||||||
#else
|
|
||||||
// r0 r1 r2 r3 r4 r5
|
|
||||||
// inA1, inA2, inB1, inB2, opA, opB
|
|
||||||
asm volatile("COL_LOOP:\n"
|
|
||||||
"ldr.w r2, [%[pB], #0]\n"
|
|
||||||
"add.w %[pB], %[pB], %[ch_im_in]\n"
|
|
||||||
"ldr.w r5, [%[pB], #0]\n"
|
|
||||||
"add.w %[pB], %[pB], %[ch_im_in]\n"
|
|
||||||
"pkhbt r3, r5, r2, LSL #16\n"
|
|
||||||
"pkhtb r2, r2, r5, ASR #16\n"
|
|
||||||
"ldr.w r0, [%[pA], #0]\n"
|
|
||||||
"add.w %[pA], %[pA], %[ch_im_in]\n"
|
|
||||||
"ldr.w r5, [%[pA], #0]\n"
|
|
||||||
"add.w %[pA], %[pA], %[ch_im_in]\n"
|
|
||||||
"pkhbt r1, r5, r0, LSL #16\n"
|
|
||||||
"pkhtb r0, r0, r5, ASR #16\n"
|
|
||||||
"sxtb16 r4, r0\n"
|
|
||||||
"sxtb16 r5, r2\n"
|
|
||||||
"smlad %[sum2], r4, r5, %[sum2]\n"
|
|
||||||
"mov.w r4, r0, ror #8\n"
|
|
||||||
"mov.w r5, r2, ror #8\n"
|
|
||||||
"sxtb16 r4, r4\n"
|
|
||||||
"sxtb16 r5, r5\n"
|
|
||||||
"smlad %[sum], r4, r5, %[sum]\n"
|
|
||||||
"sxtb16 r4, r1\n"
|
|
||||||
"sxtb16 r5, r3\n"
|
|
||||||
"smlad %[sum4], r4, r5, %[sum4]\n"
|
|
||||||
"mov.w r4, r1, ror #8\n"
|
|
||||||
"mov.w r5, r3, ror #8\n"
|
|
||||||
"sxtb16 r4, r4\n"
|
|
||||||
"sxtb16 r5, r5\n"
|
|
||||||
"smlad %[sum3], r4, r5, %[sum3]\n"
|
|
||||||
"subs %[colCnt], #1\n"
|
|
||||||
"bne COL_LOOP\n"
|
|
||||||
: [ sum ] "+r"(sum),
|
|
||||||
[ sum2 ] "+r"(sum2),
|
|
||||||
[ sum3 ] "+r"(sum3),
|
|
||||||
[ sum4 ] "+r"(sum4),
|
|
||||||
[ pB ] "+r"(pB),
|
|
||||||
[ pA ] "+r"(pA)
|
|
||||||
: [ colCnt ] "r"(colCnt), [ ch_im_in ] "r"(ch_im_in)
|
|
||||||
: "r0", "r1", "r2", "r3", "r4", "r5");
|
|
||||||
#endif /*ARM_MATH_BIG_ENDIAN */
|
|
||||||
|
|
||||||
#endif /* USE_INTRINSIC */
|
|
||||||
|
|
||||||
colCnt = (dim_kernel_x * dim_kernel_y) & 0x1;
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
union arm_nnword inA, inB;
|
|
||||||
inA.word = arm_nn_read_q7x4(pA);
|
|
||||||
pA += ch_im_in;
|
|
||||||
inB.word = arm_nn_read_q7x4(pB);
|
|
||||||
pB += ch_im_in;
|
|
||||||
sum += inA.bytes[0] * inB.bytes[0];
|
|
||||||
sum2 += inA.bytes[1] * inB.bytes[1];
|
|
||||||
sum3 += inA.bytes[2] * inB.bytes[2];
|
|
||||||
sum4 += inA.bytes[3] * inB.bytes[3];
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
*pOut++ = (q7_t)__SSAT((sum >> out_shift), 8);
|
|
||||||
*pOut++ = (q7_t)__SSAT((sum2 >> out_shift), 8);
|
|
||||||
*pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8);
|
|
||||||
*pOut++ = (q7_t)__SSAT((sum4 >> out_shift), 8);
|
|
||||||
|
|
||||||
rowCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
rowCnt = ch_im_out & 0x3;
|
|
||||||
while (rowCnt)
|
|
||||||
{
|
|
||||||
q7_t *pB = colBuffer + row_shift;
|
|
||||||
const q7_t *pA = wt + row_shift;
|
|
||||||
q31_t sum = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
uint16_t colCnt = (dim_kernel_x * dim_kernel_y);
|
|
||||||
|
|
||||||
row_shift += 1;
|
|
||||||
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q7_t A1 = *pA;
|
|
||||||
q7_t B1 = *pB;
|
|
||||||
pA += ch_im_in;
|
|
||||||
pB += ch_im_in;
|
|
||||||
sum += A1 * B1;
|
|
||||||
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
*pOut++ = (q7_t)__SSAT((sum >> out_shift), 8);
|
|
||||||
rowCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
// clear counter and pointers
|
|
||||||
pBuffer = colBuffer;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
(void)bufferA;
|
|
||||||
|
|
||||||
/* Run the following code as reference implementation for Cortex-M0 and Cortex-M3 */
|
|
||||||
int i_out_y, i_out_x, i_ch_out;
|
|
||||||
int i_ker_y, i_ker_x;
|
|
||||||
|
|
||||||
/* do some checking here, basically ch_im_in == ch_im_out */
|
|
||||||
if (ch_im_in != ch_im_out)
|
|
||||||
{
|
|
||||||
return ARM_MATH_SIZE_MISMATCH;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (i_out_y = 0; i_out_y < dim_im_out_y; i_out_y++)
|
|
||||||
{
|
|
||||||
for (i_out_x = 0; i_out_x < dim_im_out_x; i_out_x++)
|
|
||||||
{
|
|
||||||
for (i_ch_out = 0; i_ch_out < ch_im_out; i_ch_out++)
|
|
||||||
{
|
|
||||||
// for each output
|
|
||||||
int conv_out = ((q31_t)(bias[i_ch_out]) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
for (i_ker_y = 0; i_ker_y < dim_kernel_y; i_ker_y++)
|
|
||||||
{
|
|
||||||
for (i_ker_x = 0; i_ker_x < dim_kernel_x; i_ker_x++)
|
|
||||||
{
|
|
||||||
int in_row = stride_y * i_out_y + i_ker_y - padding_y;
|
|
||||||
int in_col = stride_x * i_out_x + i_ker_x - padding_x;
|
|
||||||
if (in_row >= 0 && in_col >= 0 && in_row < dim_im_in_y && in_col < dim_im_in_x)
|
|
||||||
{
|
|
||||||
conv_out += Im_in[(in_row * dim_im_in_x + in_col) * ch_im_in + i_ch_out] *
|
|
||||||
wt[(i_ker_y * dim_kernel_x + i_ker_x) * ch_im_out + i_ch_out];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Im_out[(i_out_y * dim_im_out_x + i_out_x) * ch_im_out + i_ch_out] =
|
|
||||||
(q7_t)__SSAT((conv_out >> out_shift), 8);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* ARM_MATH_DSP */
|
|
||||||
|
|
||||||
/* Return to application */
|
|
||||||
return ARM_MATH_SUCCESS;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of NNConv group
|
|
||||||
*/
|
|
||||||
@ -1,218 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_nn_depthwise_conv_s8_core.c
|
|
||||||
* Description: Depthwise convolution on im2col buffers.
|
|
||||||
*
|
|
||||||
* $Date: 09. October 2020
|
|
||||||
* $Revision: V.1.0.4
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Depthwise conv on an im2col buffer where the input channel equals
|
|
||||||
* output channel.
|
|
||||||
*
|
|
||||||
* Refer header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
q7_t *arm_nn_depthwise_conv_s8_core(const q7_t *row,
|
|
||||||
const q15_t *col,
|
|
||||||
const uint16_t num_ch,
|
|
||||||
const int32_t *out_shift,
|
|
||||||
const int32_t *out_mult,
|
|
||||||
const int32_t out_offset,
|
|
||||||
const int32_t activation_min,
|
|
||||||
const int32_t activation_max,
|
|
||||||
const uint16_t kernel_size,
|
|
||||||
const int32_t *const output_bias,
|
|
||||||
q7_t *out)
|
|
||||||
{
|
|
||||||
#if defined(ARM_MATH_MVEI)
|
|
||||||
int32_t ch_per_loop = num_ch / 4;
|
|
||||||
|
|
||||||
const int32_t *bias = output_bias;
|
|
||||||
int8_t *out_tmp = out;
|
|
||||||
|
|
||||||
int32_t idx = 0;
|
|
||||||
|
|
||||||
while (ch_per_loop > 0)
|
|
||||||
{
|
|
||||||
int32x4_t ip_0;
|
|
||||||
int32x4_t ip_1;
|
|
||||||
int32_t ker_loop = kernel_size / 3;
|
|
||||||
int32x4_t out_0 = vldrwq_s32(bias);
|
|
||||||
int32x4_t out_1 = out_0;
|
|
||||||
bias += 4;
|
|
||||||
|
|
||||||
const int32_t offset = idx * 4;
|
|
||||||
const int8_t *row_0 = row + offset;
|
|
||||||
const int16_t *col_0 = col + offset;
|
|
||||||
const int16_t *col_1 = col + kernel_size * num_ch + offset;
|
|
||||||
|
|
||||||
int32x4_t ker_0 = vldrbq_s32(row_0);
|
|
||||||
|
|
||||||
while (ker_loop > 0)
|
|
||||||
{
|
|
||||||
const int8_t *row_1 = row_0 + num_ch;
|
|
||||||
const int8_t *row_2 = row_0 + 2 * num_ch;
|
|
||||||
const int32x4_t ker_1 = vldrbq_s32(row_1);
|
|
||||||
const int32x4_t ker_2 = vldrbq_s32(row_2);
|
|
||||||
|
|
||||||
ip_0 = vldrhq_s32(col_0);
|
|
||||||
ip_1 = vldrhq_s32(col_1);
|
|
||||||
col_0 += num_ch;
|
|
||||||
col_1 += num_ch;
|
|
||||||
|
|
||||||
out_0 += vmulq_s32(ip_0, ker_0);
|
|
||||||
out_1 += vmulq_s32(ip_1, ker_0);
|
|
||||||
|
|
||||||
ip_0 = vldrhq_s32(col_0);
|
|
||||||
ip_1 = vldrhq_s32(col_1);
|
|
||||||
col_0 += num_ch;
|
|
||||||
col_1 += num_ch;
|
|
||||||
|
|
||||||
out_0 += vmulq_s32(ip_0, ker_1);
|
|
||||||
out_1 += vmulq_s32(ip_1, ker_1);
|
|
||||||
|
|
||||||
ip_0 = vldrhq_s32(col_0);
|
|
||||||
ip_1 = vldrhq_s32(col_1);
|
|
||||||
col_0 += num_ch;
|
|
||||||
col_1 += num_ch;
|
|
||||||
|
|
||||||
out_0 += vmulq_s32(ip_0, ker_2);
|
|
||||||
out_1 += vmulq_s32(ip_1, ker_2);
|
|
||||||
row_0 += 3 * num_ch;
|
|
||||||
|
|
||||||
ker_0 = vldrbq_s32(row_0);
|
|
||||||
ker_loop--;
|
|
||||||
}
|
|
||||||
|
|
||||||
idx++;
|
|
||||||
/* Handle tail kernel elements */
|
|
||||||
ker_loop = kernel_size - ((kernel_size / 3) * 3);
|
|
||||||
while (ker_loop > 0)
|
|
||||||
{
|
|
||||||
ip_0 = vldrhq_s32(col_0);
|
|
||||||
ip_1 = vldrhq_s32(col_1);
|
|
||||||
|
|
||||||
out_0 += vmulq_s32(ip_0, ker_0);
|
|
||||||
out_1 += vmulq_s32(ip_1, ker_0);
|
|
||||||
|
|
||||||
col_0 += num_ch;
|
|
||||||
col_1 += num_ch;
|
|
||||||
|
|
||||||
ip_0 = vldrhq_s32(col_0);
|
|
||||||
ip_1 = vldrhq_s32(col_1);
|
|
||||||
|
|
||||||
row_0 += num_ch;
|
|
||||||
ker_0 = vldrbq_s32(row_0);
|
|
||||||
ker_loop--;
|
|
||||||
}
|
|
||||||
const int32x4_t mult = vldrwq_s32(out_mult);
|
|
||||||
const int32x4_t shift = vldrwq_s32(out_shift);
|
|
||||||
out_mult += 4;
|
|
||||||
out_shift += 4;
|
|
||||||
|
|
||||||
out_0 = arm_requantize_mve_32x4(out_0, mult, shift);
|
|
||||||
out_1 = arm_requantize_mve_32x4(out_1, mult, shift);
|
|
||||||
|
|
||||||
out_0 = vaddq_n_s32(out_0, out_offset);
|
|
||||||
out_0 = vmaxq_s32(out_0, vdupq_n_s32(activation_min));
|
|
||||||
out_0 = vminq_s32(out_0, vdupq_n_s32(activation_max));
|
|
||||||
vstrbq_s32(out_tmp, out_0);
|
|
||||||
|
|
||||||
out_1 = vaddq_n_s32(out_1, out_offset);
|
|
||||||
out_1 = vmaxq_s32(out_1, vdupq_n_s32(activation_min));
|
|
||||||
out_1 = vminq_s32(out_1, vdupq_n_s32(activation_max));
|
|
||||||
vstrbq_s32(out_tmp + num_ch, out_1);
|
|
||||||
|
|
||||||
out_tmp += 4;
|
|
||||||
ch_per_loop--;
|
|
||||||
}
|
|
||||||
|
|
||||||
int32_t tail_ch = num_ch & 3;
|
|
||||||
if (tail_ch != 0)
|
|
||||||
{
|
|
||||||
int32_t ch_idx = (num_ch & ~3);
|
|
||||||
int32x4_t col_0_sum;
|
|
||||||
int32x4_t col_1_sum;
|
|
||||||
|
|
||||||
const int32_t single_buffer_size = kernel_size * num_ch;
|
|
||||||
for (int i = 0; i < tail_ch; i++)
|
|
||||||
{
|
|
||||||
const int16_t *col_pos_0 = col + ch_idx;
|
|
||||||
const int16_t *col_pos_1 = col_pos_0 + single_buffer_size;
|
|
||||||
|
|
||||||
const int8_t *row_pos = row + ch_idx;
|
|
||||||
int32_t sum_0 = bias[i];
|
|
||||||
int32_t sum_1 = bias[i];
|
|
||||||
|
|
||||||
for (int j = 0; j < kernel_size; j++)
|
|
||||||
{
|
|
||||||
const int8_t row_val = row_pos[j * num_ch];
|
|
||||||
sum_0 += row_val * col_pos_0[j * num_ch];
|
|
||||||
sum_1 += row_val * col_pos_1[j * num_ch];
|
|
||||||
}
|
|
||||||
col_0_sum[i] = sum_0;
|
|
||||||
col_1_sum[i] = sum_1;
|
|
||||||
|
|
||||||
ch_idx++;
|
|
||||||
}
|
|
||||||
const mve_pred16_t p = vctp32q((uint32_t)tail_ch);
|
|
||||||
const int32x4_t mult = vldrwq_z_s32(out_mult, p);
|
|
||||||
const int32x4_t shift = vldrwq_z_s32(out_shift, p);
|
|
||||||
|
|
||||||
col_0_sum = arm_requantize_mve_32x4(col_0_sum, mult, shift);
|
|
||||||
col_1_sum = arm_requantize_mve_32x4(col_1_sum, mult, shift);
|
|
||||||
|
|
||||||
col_0_sum = vaddq_n_s32(col_0_sum, out_offset);
|
|
||||||
col_0_sum = vmaxq_s32(col_0_sum, vdupq_n_s32(activation_min));
|
|
||||||
col_0_sum = vminq_s32(col_0_sum, vdupq_n_s32(activation_max));
|
|
||||||
vstrbq_p_s32(out_tmp, col_0_sum, p);
|
|
||||||
|
|
||||||
col_1_sum = vaddq_n_s32(col_1_sum, out_offset);
|
|
||||||
col_1_sum = vmaxq_s32(col_1_sum, vdupq_n_s32(activation_min));
|
|
||||||
col_1_sum = vminq_s32(col_1_sum, vdupq_n_s32(activation_max));
|
|
||||||
vstrbq_p_s32(out_tmp + num_ch, col_1_sum, p);
|
|
||||||
|
|
||||||
out_tmp += tail_ch;
|
|
||||||
}
|
|
||||||
|
|
||||||
return out_tmp + num_ch;
|
|
||||||
#else
|
|
||||||
(void)row;
|
|
||||||
(void)col;
|
|
||||||
(void)num_ch;
|
|
||||||
(void)out_shift;
|
|
||||||
(void)out_mult;
|
|
||||||
(void)out_offset;
|
|
||||||
(void)activation_min;
|
|
||||||
(void)activation_max;
|
|
||||||
(void)kernel_size;
|
|
||||||
(void)output_bias;
|
|
||||||
(void)out;
|
|
||||||
return NULL;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
@ -1,186 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_nn_mat_mult_kernel_q7_q15.c
|
|
||||||
* Description: Matrix-multiplication function for convolution
|
|
||||||
*
|
|
||||||
* $Date: January 26, 2021
|
|
||||||
* $Revision: V.1.0.2
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Matrix-multiplication function for convolution.
|
|
||||||
*
|
|
||||||
* @details Refer to header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
q7_t *arm_nn_mat_mult_kernel_q7_q15(const q7_t *pA,
|
|
||||||
const q15_t *pInBuffer,
|
|
||||||
const uint16_t ch_im_out,
|
|
||||||
const uint16_t numCol_A,
|
|
||||||
const uint16_t bias_shift,
|
|
||||||
const uint16_t out_shift,
|
|
||||||
const q7_t *bias,
|
|
||||||
q7_t *pOut)
|
|
||||||
{
|
|
||||||
#if defined(ARM_MATH_DSP)
|
|
||||||
/* set up the second output pointers */
|
|
||||||
q7_t *pOut2 = pOut + ch_im_out;
|
|
||||||
const q7_t *pBias = bias;
|
|
||||||
|
|
||||||
uint16_t rowCnt = ch_im_out >> 1;
|
|
||||||
/* this loop over rows in A */
|
|
||||||
while (rowCnt)
|
|
||||||
{
|
|
||||||
/* setup pointers for B */
|
|
||||||
const q15_t *pB = pInBuffer;
|
|
||||||
const q15_t *pB2 = pB + numCol_A;
|
|
||||||
|
|
||||||
/* align the second pointer for A */
|
|
||||||
const q7_t *pA2 = pA + numCol_A;
|
|
||||||
|
|
||||||
/* init the sum with bias */
|
|
||||||
q31_t sum = ((q31_t)(*pBias) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum3 = ((q31_t)(*pBias) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum4 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
|
|
||||||
uint16_t colCnt = numCol_A >> 2;
|
|
||||||
/* accumulate over the vector */
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q31_t inA11, inA12, inA21, inA22;
|
|
||||||
|
|
||||||
q31_t inB1 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
q31_t inB2 = arm_nn_read_q15x2_ia(&pB2);
|
|
||||||
|
|
||||||
pA = read_and_pad(pA, &inA11, &inA12);
|
|
||||||
pA2 = read_and_pad(pA2, &inA21, &inA22);
|
|
||||||
|
|
||||||
sum = __SMLAD(inA11, inB1, sum);
|
|
||||||
sum2 = __SMLAD(inA11, inB2, sum2);
|
|
||||||
sum3 = __SMLAD(inA21, inB1, sum3);
|
|
||||||
sum4 = __SMLAD(inA21, inB2, sum4);
|
|
||||||
|
|
||||||
inB1 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
inB2 = arm_nn_read_q15x2_ia(&pB2);
|
|
||||||
|
|
||||||
sum = __SMLAD(inA12, inB1, sum);
|
|
||||||
sum2 = __SMLAD(inA12, inB2, sum2);
|
|
||||||
sum3 = __SMLAD(inA22, inB1, sum3);
|
|
||||||
sum4 = __SMLAD(inA22, inB2, sum4);
|
|
||||||
|
|
||||||
colCnt--;
|
|
||||||
} /* while over colCnt */
|
|
||||||
colCnt = numCol_A & 0x3;
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q7_t inA1 = *pA++;
|
|
||||||
q15_t inB1 = *pB++;
|
|
||||||
q7_t inA2 = *pA2++;
|
|
||||||
q15_t inB2 = *pB2++;
|
|
||||||
|
|
||||||
sum += inA1 * inB1;
|
|
||||||
sum2 += inA1 * inB2;
|
|
||||||
sum3 += inA2 * inB1;
|
|
||||||
sum4 += inA2 * inB2;
|
|
||||||
colCnt--;
|
|
||||||
} /* while over colCnt */
|
|
||||||
*pOut++ = (q7_t)__SSAT((sum >> out_shift), 8);
|
|
||||||
*pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8);
|
|
||||||
*pOut2++ = (q7_t)__SSAT((sum2 >> out_shift), 8);
|
|
||||||
*pOut2++ = (q7_t)__SSAT((sum4 >> out_shift), 8);
|
|
||||||
|
|
||||||
/* skip the row computed with A2 */
|
|
||||||
pA += numCol_A;
|
|
||||||
rowCnt--;
|
|
||||||
} /* for over ch_im_out */
|
|
||||||
|
|
||||||
/* compute left-over row if any */
|
|
||||||
if (ch_im_out & 0x1)
|
|
||||||
{
|
|
||||||
/* setup pointers for B */
|
|
||||||
const q15_t *pB = pInBuffer;
|
|
||||||
const q15_t *pB2 = pB + numCol_A;
|
|
||||||
|
|
||||||
/* load the bias */
|
|
||||||
q31_t sum = ((q31_t)(*pBias) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum2 = ((q31_t)(*pBias++) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
|
|
||||||
uint16_t colCnt = numCol_A >> 2;
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q31_t inA11, inA12;
|
|
||||||
|
|
||||||
q31_t inB1 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
q31_t inB2 = arm_nn_read_q15x2_ia(&pB2);
|
|
||||||
|
|
||||||
pA = read_and_pad(pA, &inA11, &inA12);
|
|
||||||
|
|
||||||
sum = __SMLAD(inA11, inB1, sum);
|
|
||||||
sum2 = __SMLAD(inA11, inB2, sum2);
|
|
||||||
|
|
||||||
inB1 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
inB2 = arm_nn_read_q15x2_ia(&pB2);
|
|
||||||
|
|
||||||
sum = __SMLAD(inA12, inB1, sum);
|
|
||||||
sum2 = __SMLAD(inA12, inB2, sum2);
|
|
||||||
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
colCnt = numCol_A & 0x3;
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q7_t inA1 = *pA++;
|
|
||||||
q15_t inB1 = *pB++;
|
|
||||||
q15_t inB2 = *pB2++;
|
|
||||||
|
|
||||||
sum += inA1 * inB1;
|
|
||||||
sum2 += inA1 * inB2;
|
|
||||||
colCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
*pOut++ = (q7_t)__SSAT((sum >> out_shift), 8);
|
|
||||||
*pOut2++ = (q7_t)__SSAT((sum2 >> out_shift), 8);
|
|
||||||
}
|
|
||||||
|
|
||||||
pOut += ch_im_out;
|
|
||||||
|
|
||||||
/* return the new output pointer with offset */
|
|
||||||
return pOut;
|
|
||||||
#else
|
|
||||||
(void)pA;
|
|
||||||
(void)pInBuffer;
|
|
||||||
(void)ch_im_out;
|
|
||||||
(void)numCol_A;
|
|
||||||
(void)bias_shift;
|
|
||||||
(void)out_shift;
|
|
||||||
(void)bias;
|
|
||||||
(void)pOut;
|
|
||||||
/* To be completed */
|
|
||||||
return NULL;
|
|
||||||
#endif /* ARM_MATH_DSP */
|
|
||||||
}
|
|
||||||
@ -1,137 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_nn_mat_mult_kernel_q7_q15_reordered.c
|
|
||||||
* Description: Matrix-multiplication function for convolution with reordered columns
|
|
||||||
*
|
|
||||||
* $Date: January 26, 2021
|
|
||||||
* $Revision: V.1.0.2
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @brief Matrix-multiplication function for convolution with re-ordered input.
|
|
||||||
*
|
|
||||||
* @details Refer to header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
q7_t *arm_nn_mat_mult_kernel_q7_q15_reordered(const q7_t *pA,
|
|
||||||
const q15_t *pInBuffer,
|
|
||||||
const uint16_t ch_im_out,
|
|
||||||
const uint16_t numCol_A,
|
|
||||||
const uint16_t bias_shift,
|
|
||||||
const uint16_t out_shift,
|
|
||||||
const q7_t *bias,
|
|
||||||
q7_t *pOut)
|
|
||||||
{
|
|
||||||
|
|
||||||
#if defined(ARM_MATH_DSP)
|
|
||||||
/* set up the second output pointers */
|
|
||||||
q7_t *pOut2 = pOut + ch_im_out;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
/* this loop over rows in A */
|
|
||||||
for (i = 0; i < ch_im_out; i += 2)
|
|
||||||
{
|
|
||||||
/* setup pointers for B */
|
|
||||||
const q15_t *pB = pInBuffer;
|
|
||||||
const q15_t *pB2 = pB + numCol_A;
|
|
||||||
|
|
||||||
/* align the second pointer for A */
|
|
||||||
const q7_t *pA2 = pA + numCol_A;
|
|
||||||
|
|
||||||
/* init the sum with bias */
|
|
||||||
q31_t sum = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum2 = ((q31_t)(bias[i]) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum3 = ((q31_t)(bias[i + 1]) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
q31_t sum4 = ((q31_t)(bias[i + 1]) << bias_shift) + NN_ROUND(out_shift);
|
|
||||||
|
|
||||||
uint16_t colCnt = numCol_A >> 2;
|
|
||||||
/* accumulate over the vector */
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q31_t inA11, inA12, inA21, inA22;
|
|
||||||
|
|
||||||
q31_t inB1 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
q31_t inB2 = arm_nn_read_q15x2_ia(&pB2);
|
|
||||||
|
|
||||||
pA = read_and_pad_reordered(pA, &inA11, &inA12);
|
|
||||||
pA2 = read_and_pad_reordered(pA2, &inA21, &inA22);
|
|
||||||
|
|
||||||
sum = __SMLAD(inA11, inB1, sum);
|
|
||||||
sum2 = __SMLAD(inA11, inB2, sum2);
|
|
||||||
sum3 = __SMLAD(inA21, inB1, sum3);
|
|
||||||
sum4 = __SMLAD(inA21, inB2, sum4);
|
|
||||||
|
|
||||||
inB1 = arm_nn_read_q15x2_ia(&pB);
|
|
||||||
inB2 = arm_nn_read_q15x2_ia(&pB2);
|
|
||||||
|
|
||||||
sum = __SMLAD(inA12, inB1, sum);
|
|
||||||
sum2 = __SMLAD(inA12, inB2, sum2);
|
|
||||||
sum3 = __SMLAD(inA22, inB1, sum3);
|
|
||||||
sum4 = __SMLAD(inA22, inB2, sum4);
|
|
||||||
|
|
||||||
colCnt--;
|
|
||||||
} /* while over colCnt */
|
|
||||||
colCnt = numCol_A & 0x3;
|
|
||||||
while (colCnt)
|
|
||||||
{
|
|
||||||
q7_t inA1 = *pA++;
|
|
||||||
q15_t inB1 = *pB++;
|
|
||||||
q7_t inA2 = *pA2++;
|
|
||||||
q15_t inB2 = *pB2++;
|
|
||||||
|
|
||||||
sum += inA1 * inB1;
|
|
||||||
sum2 += inA1 * inB2;
|
|
||||||
sum3 += inA2 * inB1;
|
|
||||||
sum4 += inA2 * inB2;
|
|
||||||
colCnt--;
|
|
||||||
} /* while over colCnt */
|
|
||||||
*pOut++ = (q7_t)__SSAT((sum >> out_shift), 8);
|
|
||||||
*pOut++ = (q7_t)__SSAT((sum3 >> out_shift), 8);
|
|
||||||
*pOut2++ = (q7_t)__SSAT((sum2 >> out_shift), 8);
|
|
||||||
*pOut2++ = (q7_t)__SSAT((sum4 >> out_shift), 8);
|
|
||||||
|
|
||||||
/* skip the row computed with A2 */
|
|
||||||
pA += numCol_A;
|
|
||||||
} /* for over ch_im_out */
|
|
||||||
|
|
||||||
pOut += ch_im_out;
|
|
||||||
|
|
||||||
/* return the new output pointer with offset */
|
|
||||||
return pOut;
|
|
||||||
#else
|
|
||||||
(void)pA;
|
|
||||||
(void)pInBuffer;
|
|
||||||
(void)ch_im_out;
|
|
||||||
(void)numCol_A;
|
|
||||||
(void)bias_shift;
|
|
||||||
(void)out_shift;
|
|
||||||
(void)bias;
|
|
||||||
(void)pOut;
|
|
||||||
/* To be completed */
|
|
||||||
return NULL;
|
|
||||||
#endif /* ARM_MATH_DSP */
|
|
||||||
}
|
|
||||||
@ -1,245 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_nn_mat_mult_kernel_s8_s16.c
|
|
||||||
* Description: Matrix-multiplication function for convolution
|
|
||||||
*
|
|
||||||
* $Date: 14. December 2021
|
|
||||||
* $Revision: V.1.1.0
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Matrix-multiplication function for convolution with per-channel requantization.
|
|
||||||
*
|
|
||||||
* Refer header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
q7_t *arm_nn_mat_mult_kernel_s8_s16(const q7_t *input_a,
|
|
||||||
const q15_t *input_b,
|
|
||||||
const uint16_t output_ch,
|
|
||||||
const int32_t *out_shift,
|
|
||||||
const int32_t *out_mult,
|
|
||||||
const int32_t out_offset,
|
|
||||||
const int16_t activation_min,
|
|
||||||
const int16_t activation_max,
|
|
||||||
const uint16_t num_col_a,
|
|
||||||
const int32_t *const output_bias,
|
|
||||||
q7_t *out_0)
|
|
||||||
{
|
|
||||||
#if !defined(ARM_MATH_MVEI)
|
|
||||||
/* set up the second output pointers */
|
|
||||||
q7_t *out_1 = out_0 + output_ch;
|
|
||||||
const int32_t *bias = output_bias;
|
|
||||||
|
|
||||||
uint16_t row_count = output_ch / 2;
|
|
||||||
const q7_t *ip_a0 = input_a;
|
|
||||||
/* this loop over rows in A */
|
|
||||||
while (row_count)
|
|
||||||
{
|
|
||||||
/* setup pointers for B */
|
|
||||||
const q15_t *ip_b0 = input_b;
|
|
||||||
const q15_t *ip_b1 = ip_b0 + num_col_a;
|
|
||||||
|
|
||||||
/* align the second pointer for A */
|
|
||||||
const q7_t *ip_a1 = ip_a0 + num_col_a;
|
|
||||||
|
|
||||||
q31_t ch_0_out_0 = 0;
|
|
||||||
q31_t ch_0_out_1 = 0;
|
|
||||||
q31_t ch_1_out_0 = 0;
|
|
||||||
q31_t ch_1_out_1 = 0;
|
|
||||||
/* Init accumulator with bias for channel N and N + 1 */
|
|
||||||
if (bias)
|
|
||||||
{
|
|
||||||
ch_0_out_0 = *bias;
|
|
||||||
ch_0_out_1 = *bias++;
|
|
||||||
ch_1_out_0 = *bias;
|
|
||||||
ch_1_out_1 = *bias++;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(ARM_MATH_DSP)
|
|
||||||
uint16_t col_count = num_col_a / 4;
|
|
||||||
/* accumulate over the vector */
|
|
||||||
while (col_count)
|
|
||||||
{
|
|
||||||
q31_t a01, a02, a11, a12;
|
|
||||||
q31_t b0 = arm_nn_read_q15x2_ia(&ip_b0);
|
|
||||||
q31_t b1 = arm_nn_read_q15x2_ia(&ip_b1);
|
|
||||||
|
|
||||||
ip_a0 = read_and_pad(ip_a0, &a01, &a02);
|
|
||||||
ip_a1 = read_and_pad(ip_a1, &a11, &a12);
|
|
||||||
|
|
||||||
ch_0_out_0 = __SMLAD(a01, b0, ch_0_out_0);
|
|
||||||
ch_0_out_1 = __SMLAD(a01, b1, ch_0_out_1);
|
|
||||||
ch_1_out_0 = __SMLAD(a11, b0, ch_1_out_0);
|
|
||||||
ch_1_out_1 = __SMLAD(a11, b1, ch_1_out_1);
|
|
||||||
|
|
||||||
b0 = arm_nn_read_q15x2_ia(&ip_b0);
|
|
||||||
b1 = arm_nn_read_q15x2_ia(&ip_b1);
|
|
||||||
|
|
||||||
ch_0_out_0 = __SMLAD(a02, b0, ch_0_out_0);
|
|
||||||
ch_0_out_1 = __SMLAD(a02, b1, ch_0_out_1);
|
|
||||||
ch_1_out_0 = __SMLAD(a12, b0, ch_1_out_0);
|
|
||||||
ch_1_out_1 = __SMLAD(a12, b1, ch_1_out_1);
|
|
||||||
|
|
||||||
col_count--;
|
|
||||||
} /* while over col_count */
|
|
||||||
col_count = num_col_a & 0x3;
|
|
||||||
#else
|
|
||||||
uint16_t col_count = num_col_a;
|
|
||||||
#endif
|
|
||||||
while (col_count)
|
|
||||||
{
|
|
||||||
q7_t a0 = *ip_a0++;
|
|
||||||
q15_t b0 = *ip_b0++;
|
|
||||||
q7_t a1 = *ip_a1++;
|
|
||||||
q15_t b1 = *ip_b1++;
|
|
||||||
|
|
||||||
ch_0_out_0 += a0 * b0;
|
|
||||||
ch_0_out_1 += a0 * b1;
|
|
||||||
ch_1_out_0 += a1 * b0;
|
|
||||||
ch_1_out_1 += a1 * b1;
|
|
||||||
col_count--;
|
|
||||||
} /* while over col_count */
|
|
||||||
|
|
||||||
ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift);
|
|
||||||
ch_0_out_0 += out_offset;
|
|
||||||
ch_0_out_0 = MAX(ch_0_out_0, activation_min);
|
|
||||||
ch_0_out_0 = MIN(ch_0_out_0, activation_max);
|
|
||||||
*out_0++ = (q7_t)ch_0_out_0;
|
|
||||||
|
|
||||||
ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift);
|
|
||||||
ch_0_out_1 += out_offset;
|
|
||||||
ch_0_out_1 = MAX(ch_0_out_1, activation_min);
|
|
||||||
ch_0_out_1 = MIN(ch_0_out_1, activation_max);
|
|
||||||
*out_1++ = (q7_t)ch_0_out_1;
|
|
||||||
out_mult++;
|
|
||||||
out_shift++;
|
|
||||||
|
|
||||||
ch_1_out_0 = arm_nn_requantize(ch_1_out_0, *out_mult, *out_shift);
|
|
||||||
ch_1_out_0 += out_offset;
|
|
||||||
ch_1_out_0 = MAX(ch_1_out_0, activation_min);
|
|
||||||
ch_1_out_0 = MIN(ch_1_out_0, activation_max);
|
|
||||||
*out_0++ = (q7_t)ch_1_out_0;
|
|
||||||
|
|
||||||
ch_1_out_1 = arm_nn_requantize(ch_1_out_1, *out_mult, *out_shift);
|
|
||||||
ch_1_out_1 += out_offset;
|
|
||||||
ch_1_out_1 = MAX(ch_1_out_1, activation_min);
|
|
||||||
ch_1_out_1 = MIN(ch_1_out_1, activation_max);
|
|
||||||
*out_1++ = (q7_t)ch_1_out_1;
|
|
||||||
out_mult++;
|
|
||||||
out_shift++;
|
|
||||||
|
|
||||||
/* skip row */
|
|
||||||
ip_a0 += num_col_a;
|
|
||||||
row_count--;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* compute the last odd numbered row if any */
|
|
||||||
if (output_ch & 0x1)
|
|
||||||
{
|
|
||||||
/* setup pointers for B */
|
|
||||||
const q15_t *ip_b0 = input_b;
|
|
||||||
const q15_t *ip_b1 = ip_b0 + num_col_a;
|
|
||||||
|
|
||||||
q31_t ch_0_out_0 = 0;
|
|
||||||
q31_t ch_0_out_1 = 0;
|
|
||||||
|
|
||||||
/* load the bias */
|
|
||||||
if (bias)
|
|
||||||
{
|
|
||||||
ch_0_out_0 = *bias;
|
|
||||||
ch_0_out_1 = *bias++;
|
|
||||||
}
|
|
||||||
|
|
||||||
#if defined(ARM_MATH_DSP)
|
|
||||||
uint16_t col_count = num_col_a >> 2;
|
|
||||||
while (col_count)
|
|
||||||
{
|
|
||||||
q31_t a01, a02;
|
|
||||||
q31_t b0 = arm_nn_read_q15x2_ia(&ip_b0);
|
|
||||||
q31_t b1 = arm_nn_read_q15x2_ia(&ip_b1);
|
|
||||||
|
|
||||||
ip_a0 = read_and_pad(ip_a0, &a01, &a02);
|
|
||||||
|
|
||||||
ch_0_out_0 = __SMLAD(a01, b0, ch_0_out_0);
|
|
||||||
ch_0_out_1 = __SMLAD(a01, b1, ch_0_out_1);
|
|
||||||
|
|
||||||
b0 = arm_nn_read_q15x2_ia(&ip_b0);
|
|
||||||
b1 = arm_nn_read_q15x2_ia(&ip_b1);
|
|
||||||
ch_0_out_0 = __SMLAD(a02, b0, ch_0_out_0);
|
|
||||||
ch_0_out_1 = __SMLAD(a02, b1, ch_0_out_1);
|
|
||||||
|
|
||||||
col_count--;
|
|
||||||
}
|
|
||||||
col_count = num_col_a & 0x3;
|
|
||||||
#else
|
|
||||||
uint16_t col_count = num_col_a;
|
|
||||||
#endif
|
|
||||||
while (col_count)
|
|
||||||
{
|
|
||||||
q7_t a0 = *ip_a0++;
|
|
||||||
q15_t b0 = *ip_b0++;
|
|
||||||
q15_t b1 = *ip_b1++;
|
|
||||||
|
|
||||||
ch_0_out_0 += a0 * b0;
|
|
||||||
ch_0_out_1 += a0 * b1;
|
|
||||||
col_count--;
|
|
||||||
}
|
|
||||||
ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift);
|
|
||||||
ch_0_out_0 += out_offset;
|
|
||||||
ch_0_out_0 = MAX(ch_0_out_0, activation_min);
|
|
||||||
ch_0_out_0 = MIN(ch_0_out_0, activation_max);
|
|
||||||
*out_0++ = (q7_t)ch_0_out_0;
|
|
||||||
|
|
||||||
ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift);
|
|
||||||
ch_0_out_1 += out_offset;
|
|
||||||
ch_0_out_1 = MAX(ch_0_out_1, activation_min);
|
|
||||||
ch_0_out_1 = MIN(ch_0_out_1, activation_max);
|
|
||||||
*out_1++ = (q7_t)ch_0_out_1;
|
|
||||||
out_mult++;
|
|
||||||
out_shift++;
|
|
||||||
}
|
|
||||||
|
|
||||||
out_0 += output_ch;
|
|
||||||
|
|
||||||
/* return the new output pointer with offset */
|
|
||||||
return out_0;
|
|
||||||
#else
|
|
||||||
(void)input_a;
|
|
||||||
(void)input_b;
|
|
||||||
(void)output_ch;
|
|
||||||
(void)out_shift;
|
|
||||||
(void)out_mult;
|
|
||||||
(void)out_offset;
|
|
||||||
(void)activation_min;
|
|
||||||
(void)activation_max;
|
|
||||||
(void)num_col_a;
|
|
||||||
(void)output_bias;
|
|
||||||
(void)out_0;
|
|
||||||
/* To be completed */
|
|
||||||
return NULL;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
@ -1,201 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2020 Arm Limited or its affiliates. All rights reserved.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_nn_mat_mult_kernel_s8_s16_reordered.c
|
|
||||||
* Description: Matrix-multiplication function for convolution with reordered columns
|
|
||||||
*
|
|
||||||
* $Date: 09. October 2020
|
|
||||||
* $Revision: V.1.0.3
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnfunctions.h"
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Matrix-multiplication with re-ordered input and bias inputs for convolution with per-channel
|
|
||||||
* requantization. The re-ordering is a consequence of sign extension is done by the SXTB16 command.
|
|
||||||
*
|
|
||||||
* Refer header file for details. This function differs from arm_nn_mat_mult_kernel_s8_s16(), in that it uses
|
|
||||||
* read_and_pad_reordered() instead of arm_nn_mat_mult_kernel_s8_s16(). Investigating the cycles impact and
|
|
||||||
* unifying these two functions is a potential future improvement.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
q7_t *arm_nn_mat_mult_kernel_s8_s16_reordered(const q7_t *input_a,
|
|
||||||
const q15_t *input_b,
|
|
||||||
const uint16_t output_ch,
|
|
||||||
const int32_t *out_shift,
|
|
||||||
const int32_t *out_mult,
|
|
||||||
const int32_t out_offset,
|
|
||||||
const int16_t activation_min,
|
|
||||||
const int16_t activation_max,
|
|
||||||
const uint16_t num_col_a,
|
|
||||||
const int32_t *const output_bias,
|
|
||||||
q7_t *out_0)
|
|
||||||
{
|
|
||||||
#if defined(ARM_MATH_DSP)
|
|
||||||
/* set up the second output pointers */
|
|
||||||
q7_t *out_1 = out_0 + output_ch;
|
|
||||||
const int32_t *bias = output_bias;
|
|
||||||
|
|
||||||
uint16_t row_count = output_ch / 2;
|
|
||||||
const q7_t *ip_a0 = input_a;
|
|
||||||
/* this loop over rows in A */
|
|
||||||
while (row_count)
|
|
||||||
{
|
|
||||||
/* setup pointers for B */
|
|
||||||
const q15_t *ip_b0 = input_b;
|
|
||||||
const q15_t *ip_b1 = ip_b0 + num_col_a;
|
|
||||||
|
|
||||||
/* align the second pointer for A */
|
|
||||||
const q7_t *ip_a1 = ip_a0 + num_col_a;
|
|
||||||
|
|
||||||
/* Init accumulator with bias for channel N and N + 1 */
|
|
||||||
q31_t ch_0_out_0 = *bias;
|
|
||||||
q31_t ch_0_out_1 = *bias++;
|
|
||||||
q31_t ch_1_out_0 = *bias;
|
|
||||||
q31_t ch_1_out_1 = *bias++;
|
|
||||||
|
|
||||||
uint16_t col_count = num_col_a / 4;
|
|
||||||
/* accumulate over the vector */
|
|
||||||
while (col_count)
|
|
||||||
{
|
|
||||||
q31_t a01, a02, a11, a12;
|
|
||||||
q31_t b0 = arm_nn_read_q15x2_ia(&ip_b0);
|
|
||||||
q31_t b1 = arm_nn_read_q15x2_ia(&ip_b1);
|
|
||||||
|
|
||||||
ip_a0 = read_and_pad_reordered(ip_a0, &a01, &a02);
|
|
||||||
ip_a1 = read_and_pad_reordered(ip_a1, &a11, &a12);
|
|
||||||
|
|
||||||
ch_0_out_0 = __SMLAD(a01, b0, ch_0_out_0);
|
|
||||||
ch_0_out_1 = __SMLAD(a01, b1, ch_0_out_1);
|
|
||||||
ch_1_out_0 = __SMLAD(a11, b0, ch_1_out_0);
|
|
||||||
ch_1_out_1 = __SMLAD(a11, b1, ch_1_out_1);
|
|
||||||
|
|
||||||
b0 = arm_nn_read_q15x2_ia(&ip_b0);
|
|
||||||
b1 = arm_nn_read_q15x2_ia(&ip_b1);
|
|
||||||
|
|
||||||
ch_0_out_0 = __SMLAD(a02, b0, ch_0_out_0);
|
|
||||||
ch_0_out_1 = __SMLAD(a02, b1, ch_0_out_1);
|
|
||||||
ch_1_out_0 = __SMLAD(a12, b0, ch_1_out_0);
|
|
||||||
ch_1_out_1 = __SMLAD(a12, b1, ch_1_out_1);
|
|
||||||
|
|
||||||
col_count--;
|
|
||||||
} /* while over col_count */
|
|
||||||
|
|
||||||
ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift);
|
|
||||||
ch_0_out_0 += out_offset;
|
|
||||||
ch_0_out_0 = MAX(ch_0_out_0, activation_min);
|
|
||||||
ch_0_out_0 = MIN(ch_0_out_0, activation_max);
|
|
||||||
*out_0++ = (q7_t)ch_0_out_0;
|
|
||||||
|
|
||||||
ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift);
|
|
||||||
ch_0_out_1 += out_offset;
|
|
||||||
ch_0_out_1 = MAX(ch_0_out_1, activation_min);
|
|
||||||
ch_0_out_1 = MIN(ch_0_out_1, activation_max);
|
|
||||||
*out_1++ = (q7_t)ch_0_out_1;
|
|
||||||
out_mult++;
|
|
||||||
out_shift++;
|
|
||||||
|
|
||||||
ch_1_out_0 = arm_nn_requantize(ch_1_out_0, *out_mult, *out_shift);
|
|
||||||
ch_1_out_0 += out_offset;
|
|
||||||
ch_1_out_0 = MAX(ch_1_out_0, activation_min);
|
|
||||||
ch_1_out_0 = MIN(ch_1_out_0, activation_max);
|
|
||||||
*out_0++ = (q7_t)ch_1_out_0;
|
|
||||||
|
|
||||||
ch_1_out_1 = arm_nn_requantize(ch_1_out_1, *out_mult, *out_shift);
|
|
||||||
ch_1_out_1 += out_offset;
|
|
||||||
ch_1_out_1 = MAX(ch_1_out_1, activation_min);
|
|
||||||
ch_1_out_1 = MIN(ch_1_out_1, activation_max);
|
|
||||||
*out_1++ = (q7_t)ch_1_out_1;
|
|
||||||
out_mult++;
|
|
||||||
out_shift++;
|
|
||||||
|
|
||||||
/* skip row */
|
|
||||||
ip_a0 += num_col_a;
|
|
||||||
row_count--;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (output_ch & 1)
|
|
||||||
{
|
|
||||||
/* setup pointers for B */
|
|
||||||
const q15_t *ip_b0 = input_b;
|
|
||||||
const q15_t *ip_b1 = ip_b0 + num_col_a;
|
|
||||||
|
|
||||||
/* Init accumulator with bias for channel N + 1 */
|
|
||||||
q31_t ch_0_out_0 = *bias;
|
|
||||||
q31_t ch_0_out_1 = ch_0_out_0;
|
|
||||||
|
|
||||||
int32_t col_count = num_col_a / 4;
|
|
||||||
while (col_count)
|
|
||||||
{
|
|
||||||
q31_t a01, a02;
|
|
||||||
q31_t b0 = arm_nn_read_q15x2_ia(&ip_b0);
|
|
||||||
q31_t b1 = arm_nn_read_q15x2_ia(&ip_b1);
|
|
||||||
|
|
||||||
ip_a0 = read_and_pad_reordered(ip_a0, &a01, &a02);
|
|
||||||
|
|
||||||
ch_0_out_0 = __SMLAD(a01, b0, ch_0_out_0);
|
|
||||||
ch_0_out_1 = __SMLAD(a01, b1, ch_0_out_1);
|
|
||||||
|
|
||||||
b0 = arm_nn_read_q15x2_ia(&ip_b0);
|
|
||||||
b1 = arm_nn_read_q15x2_ia(&ip_b1);
|
|
||||||
|
|
||||||
ch_0_out_0 = __SMLAD(a02, b0, ch_0_out_0);
|
|
||||||
ch_0_out_1 = __SMLAD(a02, b1, ch_0_out_1);
|
|
||||||
|
|
||||||
col_count--;
|
|
||||||
} /* while over col_count */
|
|
||||||
|
|
||||||
ch_0_out_0 = arm_nn_requantize(ch_0_out_0, *out_mult, *out_shift);
|
|
||||||
ch_0_out_0 += out_offset;
|
|
||||||
ch_0_out_0 = MAX(ch_0_out_0, activation_min);
|
|
||||||
ch_0_out_0 = MIN(ch_0_out_0, activation_max);
|
|
||||||
*out_0++ = (q7_t)ch_0_out_0;
|
|
||||||
|
|
||||||
ch_0_out_1 = arm_nn_requantize(ch_0_out_1, *out_mult, *out_shift);
|
|
||||||
ch_0_out_1 += out_offset;
|
|
||||||
ch_0_out_1 = MAX(ch_0_out_1, activation_min);
|
|
||||||
ch_0_out_1 = MIN(ch_0_out_1, activation_max);
|
|
||||||
*out_1++ = (q7_t)ch_0_out_1;
|
|
||||||
}
|
|
||||||
|
|
||||||
out_0 += output_ch;
|
|
||||||
|
|
||||||
/* return the new output pointer with offset */
|
|
||||||
return out_0;
|
|
||||||
#else
|
|
||||||
(void)input_a;
|
|
||||||
(void)input_b;
|
|
||||||
(void)output_ch;
|
|
||||||
(void)out_shift;
|
|
||||||
(void)out_mult;
|
|
||||||
(void)out_offset;
|
|
||||||
(void)activation_min;
|
|
||||||
(void)activation_max;
|
|
||||||
(void)num_col_a;
|
|
||||||
(void)output_bias;
|
|
||||||
(void)out_0;
|
|
||||||
/* To be completed */
|
|
||||||
return NULL;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
@ -1,180 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (C) 2010-2021 Arm Limited or its affiliates.
|
|
||||||
*
|
|
||||||
* SPDX-License-Identifier: Apache-2.0
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
* not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
* Project: CMSIS NN Library
|
|
||||||
* Title: arm_nn_mat_mult_s8.c
|
|
||||||
* Description: General Matrix-multiplication function
|
|
||||||
*
|
|
||||||
* $Date: 27. October 2021
|
|
||||||
* $Revision: V.2.0.6
|
|
||||||
*
|
|
||||||
* Target Processor: Cortex-M cores
|
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_nnsupportfunctions.h"
|
|
||||||
|
|
||||||
/*
|
|
||||||
* s8 General matrix multiplication function with per-channel requantization for upto 4 column batches.
|
|
||||||
*
|
|
||||||
* Refer header file for details.
|
|
||||||
*
|
|
||||||
*/
|
|
||||||
|
|
||||||
q7_t *arm_nn_mat_mult_s8(const q7_t *input_row,
|
|
||||||
const q7_t *input_col,
|
|
||||||
const uint16_t output_ch,
|
|
||||||
const uint16_t col_batches,
|
|
||||||
const int32_t *output_shift,
|
|
||||||
const int32_t *output_mult,
|
|
||||||
const int32_t out_offset,
|
|
||||||
const int32_t col_offset,
|
|
||||||
const int32_t row_offset,
|
|
||||||
const int16_t activation_min,
|
|
||||||
const int16_t activation_max,
|
|
||||||
const uint16_t row_len,
|
|
||||||
const int32_t *const bias,
|
|
||||||
q7_t *out)
|
|
||||||
{
|
|
||||||
#if defined(ARM_MATH_MVEI)
|
|
||||||
(void)row_offset;
|
|
||||||
if (col_batches == 4)
|
|
||||||
{
|
|
||||||
for (int i_out_ch = 0; i_out_ch < output_ch; i_out_ch++)
|
|
||||||
{
|
|
||||||
int32_t row_len_tmp = row_len;
|
|
||||||
const int8_t *ip_r0 = input_row + (i_out_ch * row_len);
|
|
||||||
const int8_t *ip_c0 = input_col;
|
|
||||||
const int8_t *ip_c1 = input_col + row_len;
|
|
||||||
const int8_t *ip_c2 = input_col + (2 * row_len);
|
|
||||||
const int8_t *ip_c3 = input_col + (3 * row_len);
|
|
||||||
|
|
||||||
int32_t acc_0 = 0;
|
|
||||||
int32_t acc_1 = 0;
|
|
||||||
int32_t acc_2 = 0;
|
|
||||||
int32_t acc_3 = 0;
|
|
||||||
const int32_t row_loop_cnt = (row_len + 7) / 8;
|
|
||||||
|
|
||||||
for (int i_row_loop = 0; i_row_loop < row_loop_cnt; i_row_loop++)
|
|
||||||
{
|
|
||||||
mve_pred16_t p = vctp16q((uint32_t)row_len_tmp);
|
|
||||||
const int16x8_t offset = vdupq_m_n_s16(vuninitializedq_s16(), col_offset, p);
|
|
||||||
row_len_tmp -= 8;
|
|
||||||
|
|
||||||
int16x8_t c0 = vldrbq_s16(ip_c0);
|
|
||||||
ip_c0 += 8;
|
|
||||||
c0 = vaddq_s16(c0, offset);
|
|
||||||
|
|
||||||
int16x8_t c1 = vldrbq_s16(ip_c1);
|
|
||||||
ip_c1 += 8;
|
|
||||||
c1 = vaddq_s16(c1, offset);
|
|
||||||
|
|
||||||
int16x8_t c2 = vldrbq_s16(ip_c2);
|
|
||||||
ip_c2 += 8;
|
|
||||||
c2 = vaddq_s16(c2, offset);
|
|
||||||
|
|
||||||
int16x8_t c3 = vldrbq_s16(ip_c3);
|
|
||||||
ip_c3 += 8;
|
|
||||||
c3 = vaddq_s16(c3, offset);
|
|
||||||
|
|
||||||
int16x8_t r0 = vldrbq_z_s16(ip_r0, p);
|
|
||||||
ip_r0 += 8;
|
|
||||||
|
|
||||||
acc_0 = vmladavaq_p_s16(acc_0, r0, c0, p);
|
|
||||||
acc_1 = vmladavaq_p_s16(acc_1, r0, c1, p);
|
|
||||||
acc_2 = vmladavaq_p_s16(acc_2, r0, c2, p);
|
|
||||||
acc_3 = vmladavaq_p_s16(acc_3, r0, c3, p);
|
|
||||||
}
|
|
||||||
|
|
||||||
int32x4_t res = {acc_0, acc_1, acc_2, acc_3};
|
|
||||||
if (bias)
|
|
||||||
{
|
|
||||||
res = vaddq_n_s32(res, bias[i_out_ch]);
|
|
||||||
}
|
|
||||||
res = arm_requantize_mve(res, output_mult[i_out_ch], output_shift[i_out_ch]);
|
|
||||||
res = vaddq_n_s32(res, out_offset);
|
|
||||||
|
|
||||||
res = vmaxq_s32(res, vdupq_n_s32(activation_min));
|
|
||||||
res = vminq_s32(res, vdupq_n_s32(activation_max));
|
|
||||||
|
|
||||||
const uint32x4_t scatter_offset = {0, output_ch, output_ch * 2, output_ch * 3};
|
|
||||||
vstrbq_scatter_offset_s32(&out[i_out_ch], scatter_offset, res);
|
|
||||||
}
|
|
||||||
out += 4 * output_ch;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
for (int i_col_batch = (col_batches & ~0x3); i_col_batch < (col_batches & 0x3); i_col_batch++)
|
|
||||||
{
|
|
||||||
for (int i_out_ch = 0; i_out_ch < output_ch; i_out_ch++)
|
|
||||||
{
|
|
||||||
int32_t row_len_tmp = row_len;
|
|
||||||
|
|
||||||
const int8_t *ip_r0 = input_row + (i_out_ch * row_len);
|
|
||||||
const int8_t *ip_c0 = input_col + (i_col_batch * row_len);
|
|
||||||
int32_t acc_0 = 0;
|
|
||||||
const int32_t row_loop_cnt = (row_len + 7) / 8;
|
|
||||||
|
|
||||||
for (int i_row_loop = 0; i_row_loop < row_loop_cnt; i_row_loop++)
|
|
||||||
{
|
|
||||||
const mve_pred16_t p = vctp16q((uint32_t)row_len_tmp);
|
|
||||||
const int16x8_t offset = vdupq_m_n_s16(vuninitializedq_s16(), col_offset, p);
|
|
||||||
row_len_tmp -= 8;
|
|
||||||
|
|
||||||
int16x8_t c0 = vldrbq_s16(ip_c0);
|
|
||||||
ip_c0 += 8;
|
|
||||||
c0 = vaddq_s16(c0, offset);
|
|
||||||
|
|
||||||
int16x8_t r0 = vldrbq_z_s16(ip_r0, p);
|
|
||||||
ip_r0 += 8;
|
|
||||||
acc_0 = vmladavaq_p_s16(acc_0, r0, c0, p);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bias)
|
|
||||||
{
|
|
||||||
acc_0 += bias[i_out_ch];
|
|
||||||
}
|
|
||||||
acc_0 = arm_nn_requantize(acc_0, output_mult[i_out_ch], output_shift[i_out_ch]);
|
|
||||||
acc_0 += out_offset;
|
|
||||||
acc_0 = MAX(acc_0, activation_min);
|
|
||||||
acc_0 = MIN(acc_0, activation_max);
|
|
||||||
out[i_out_ch] = (q7_t)acc_0;
|
|
||||||
}
|
|
||||||
out += output_ch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return out;
|
|
||||||
|
|
||||||
#else
|
|
||||||
(void)input_row;
|
|
||||||
(void)input_col;
|
|
||||||
(void)output_ch;
|
|
||||||
(void)col_batches;
|
|
||||||
(void)output_shift;
|
|
||||||
(void)output_mult;
|
|
||||||
(void)out_offset;
|
|
||||||
(void)col_offset;
|
|
||||||
(void)row_offset;
|
|
||||||
(void)activation_min;
|
|
||||||
(void)activation_max;
|
|
||||||
(void)row_len;
|
|
||||||
(void)bias;
|
|
||||||
(void)out;
|
|
||||||
return NULL;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
@ -1,21 +0,0 @@
|
|||||||
#
|
|
||||||
# Copyright (c) 2019-2021 Arm Limited.
|
|
||||||
#
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the License); you may
|
|
||||||
# not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
|
||||||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
#
|
|
||||||
|
|
||||||
file(GLOB SRC "./*_s8.c")
|
|
||||||
target_sources(cmsis-nn PRIVATE ${SRC} arm_fully_connected_s16.c)
|
|
||||||
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user