Compare commits
5 Commits
47fc7071bf
...
13c2e97ae2
Author | SHA1 | Date |
---|---|---|
Solomon Peachy | 13c2e97ae2 | |
Solomon Peachy | 772cdffa1e | |
Solomon Peachy | 3a56bc49de | |
Solomon Peachy | f0b846c40a | |
Solomon Peachy | dfd5c4ed80 |
4
Makefile
4
Makefile
|
@ -4,6 +4,10 @@
|
||||||
|
|
||||||
-include Makefile.opts
|
-include Makefile.opts
|
||||||
|
|
||||||
|
ifeq ($(MCU),)
|
||||||
|
$(error Must set 'MCU' in Makefile.opts; see 'BUILD' file)
|
||||||
|
endif
|
||||||
|
|
||||||
# Toolchain - Don't change this unless you are sure what you're doing!
|
# Toolchain - Don't change this unless you are sure what you're doing!
|
||||||
TOOLCHAIN_CONFIG=toolchain-$(MCU_CORE).config
|
TOOLCHAIN_CONFIG=toolchain-$(MCU_CORE).config
|
||||||
TOOLCHAIN_SRC_DIR=crosstool-ng
|
TOOLCHAIN_SRC_DIR=crosstool-ng
|
||||||
|
|
Binary file not shown.
|
@ -1,165 +1,153 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_abs_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Floating-point vector absolute value
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_abs_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Vector absolute value.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
#include <math.h>
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
/**
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
* @ingroup groupMath
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
*/
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
/**
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @defgroup BasicAbs Vector Absolute Value
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*
|
||||||
* ---------------------------------------------------------------------------- */
|
* Computes the absolute value of a vector on an element-by-element basis.
|
||||||
|
*
|
||||||
#include "arm_math.h"
|
* <pre>
|
||||||
#include <math.h>
|
* pDst[n] = abs(pSrc[n]), 0 <= n < blockSize.
|
||||||
|
* </pre>
|
||||||
/**
|
*
|
||||||
* @ingroup groupMath
|
* The functions support in-place computation allowing the source and
|
||||||
*/
|
* destination pointers to reference the same memory buffer.
|
||||||
|
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||||
/**
|
*/
|
||||||
* @defgroup BasicAbs Vector Absolute Value
|
|
||||||
*
|
/**
|
||||||
* Computes the absolute value of a vector on an element-by-element basis.
|
* @addtogroup BasicAbs
|
||||||
*
|
* @{
|
||||||
* <pre>
|
*/
|
||||||
* pDst[n] = abs(pSrc[n]), 0 <= n < blockSize.
|
|
||||||
* </pre>
|
/**
|
||||||
*
|
* @brief Floating-point vector absolute value.
|
||||||
* The functions support in-place computation allowing the source and
|
* @param[in] *pSrc points to the input buffer
|
||||||
* destination pointers to reference the same memory buffer.
|
* @param[out] *pDst points to the output buffer
|
||||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
* @param[in] blockSize number of samples in each vector
|
||||||
*/
|
* @return none.
|
||||||
|
*/
|
||||||
/**
|
|
||||||
* @addtogroup BasicAbs
|
void arm_abs_f32(
|
||||||
* @{
|
float32_t * pSrc,
|
||||||
*/
|
float32_t * pDst,
|
||||||
|
uint32_t blockSize)
|
||||||
/**
|
{
|
||||||
* @brief Floating-point vector absolute value.
|
uint32_t blkCnt; /* loop counter */
|
||||||
* @param[in] *pSrc points to the input buffer
|
|
||||||
* @param[out] *pDst points to the output buffer
|
#if defined (ARM_MATH_DSP)
|
||||||
* @param[in] blockSize number of samples in each vector
|
|
||||||
* @return none.
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
*/
|
float32_t in1, in2, in3, in4; /* temporary variables */
|
||||||
|
|
||||||
void arm_abs_f32(
|
/*loop Unrolling */
|
||||||
float32_t * pSrc,
|
blkCnt = blockSize >> 2U;
|
||||||
float32_t * pDst,
|
|
||||||
uint32_t blockSize)
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
{
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
uint32_t blkCnt; /* loop counter */
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* C = |A| */
|
||||||
|
/* Calculate absolute and then store the results in the destination buffer. */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* read sample from source */
|
||||||
float32_t in1, in2, in3, in4; /* temporary variables */
|
in1 = *pSrc;
|
||||||
|
in2 = *(pSrc + 1);
|
||||||
/*loop Unrolling */
|
in3 = *(pSrc + 2);
|
||||||
blkCnt = blockSize >> 2u;
|
|
||||||
|
/* find absolute value */
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
in1 = fabsf(in1);
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
|
||||||
while(blkCnt > 0u)
|
/* read sample from source */
|
||||||
{
|
in4 = *(pSrc + 3);
|
||||||
/* C = |A| */
|
|
||||||
/* Calculate absolute and then store the results in the destination buffer. */
|
/* find absolute value */
|
||||||
/* read sample from source */
|
in2 = fabsf(in2);
|
||||||
in1 = *pSrc;
|
|
||||||
in2 = *(pSrc + 1);
|
/* read sample from source */
|
||||||
in3 = *(pSrc + 2);
|
*pDst = in1;
|
||||||
|
|
||||||
/* find absolute value */
|
/* find absolute value */
|
||||||
in1 = fabsf(in1);
|
in3 = fabsf(in3);
|
||||||
|
|
||||||
/* read sample from source */
|
/* find absolute value */
|
||||||
in4 = *(pSrc + 3);
|
in4 = fabsf(in4);
|
||||||
|
|
||||||
/* find absolute value */
|
/* store result to destination */
|
||||||
in2 = fabsf(in2);
|
*(pDst + 1) = in2;
|
||||||
|
|
||||||
/* read sample from source */
|
/* store result to destination */
|
||||||
*pDst = in1;
|
*(pDst + 2) = in3;
|
||||||
|
|
||||||
/* find absolute value */
|
/* store result to destination */
|
||||||
in3 = fabsf(in3);
|
*(pDst + 3) = in4;
|
||||||
|
|
||||||
/* find absolute value */
|
|
||||||
in4 = fabsf(in4);
|
/* Update source pointer to process next sampels */
|
||||||
|
pSrc += 4U;
|
||||||
/* store result to destination */
|
|
||||||
*(pDst + 1) = in2;
|
/* Update destination pointer to process next sampels */
|
||||||
|
pDst += 4U;
|
||||||
/* store result to destination */
|
|
||||||
*(pDst + 2) = in3;
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* store result to destination */
|
}
|
||||||
*(pDst + 3) = in4;
|
|
||||||
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
|
** No loop unrolling is used. */
|
||||||
/* Update source pointer to process next sampels */
|
blkCnt = blockSize % 0x4U;
|
||||||
pSrc += 4u;
|
|
||||||
|
#else
|
||||||
/* Update destination pointer to process next sampels */
|
|
||||||
pDst += 4u;
|
/* Run the below code for Cortex-M0 */
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
/* Initialize blkCnt with number of samples */
|
||||||
blkCnt--;
|
blkCnt = blockSize;
|
||||||
}
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
while (blkCnt > 0U)
|
||||||
blkCnt = blockSize % 0x4u;
|
{
|
||||||
|
/* C = |A| */
|
||||||
#else
|
/* Calculate absolute and then store the results in the destination buffer. */
|
||||||
|
*pDst++ = fabsf(*pSrc++);
|
||||||
/* Run the below code for Cortex-M0 */
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* Initialize blkCnt with number of samples */
|
blkCnt--;
|
||||||
blkCnt = blockSize;
|
}
|
||||||
|
}
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
/**
|
||||||
while(blkCnt > 0u)
|
* @} end of BasicAbs group
|
||||||
{
|
*/
|
||||||
/* C = |A| */
|
|
||||||
/* Calculate absolute and then store the results in the destination buffer. */
|
|
||||||
*pDst++ = fabsf(*pSrc++);
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicAbs group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,179 +1,167 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_abs_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q15 vector absolute value
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_abs_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q15 vector absolute value.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup BasicAbs
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Q15 vector absolute value.
|
||||||
|
* @param[in] *pSrc points to the input buffer
|
||||||
/**
|
* @param[out] *pDst points to the output buffer
|
||||||
* @ingroup groupMath
|
* @param[in] blockSize number of samples in each vector
|
||||||
*/
|
* @return none.
|
||||||
|
*
|
||||||
/**
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @addtogroup BasicAbs
|
* \par
|
||||||
* @{
|
* The function uses saturating arithmetic.
|
||||||
*/
|
* The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
|
||||||
|
*/
|
||||||
/**
|
|
||||||
* @brief Q15 vector absolute value.
|
void arm_abs_q15(
|
||||||
* @param[in] *pSrc points to the input buffer
|
q15_t * pSrc,
|
||||||
* @param[out] *pDst points to the output buffer
|
q15_t * pDst,
|
||||||
* @param[in] blockSize number of samples in each vector
|
uint32_t blockSize)
|
||||||
* @return none.
|
{
|
||||||
*
|
uint32_t blkCnt; /* loop counter */
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
|
||||||
* \par
|
#if defined (ARM_MATH_DSP)
|
||||||
* The function uses saturating arithmetic.
|
__SIMD32_TYPE *simd;
|
||||||
* The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
|
||||||
void arm_abs_q15(
|
q15_t in1; /* Input value1 */
|
||||||
q15_t * pSrc,
|
q15_t in2; /* Input value2 */
|
||||||
q15_t * pDst,
|
|
||||||
uint32_t blockSize)
|
|
||||||
{
|
/*loop Unrolling */
|
||||||
uint32_t blkCnt; /* loop counter */
|
blkCnt = blockSize >> 2U;
|
||||||
|
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
__SIMD32_TYPE *simd;
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
simd = __SIMD32_CONST(pDst);
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
q15_t in1; /* Input value1 */
|
/* C = |A| */
|
||||||
q15_t in2; /* Input value2 */
|
/* Read two inputs */
|
||||||
|
in1 = *pSrc++;
|
||||||
|
in2 = *pSrc++;
|
||||||
/*loop Unrolling */
|
|
||||||
blkCnt = blockSize >> 2u;
|
|
||||||
|
/* Store the Absolute result in the destination buffer by packing the two values, in a single cycle */
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
*simd++ =
|
||||||
simd = __SIMD32_CONST(pDst);
|
__PKHBT(((in1 > 0) ? in1 : (q15_t)__QSUB16(0, in1)),
|
||||||
while(blkCnt > 0u)
|
((in2 > 0) ? in2 : (q15_t)__QSUB16(0, in2)), 16);
|
||||||
{
|
|
||||||
/* C = |A| */
|
#else
|
||||||
/* Read two inputs */
|
|
||||||
in1 = *pSrc++;
|
|
||||||
in2 = *pSrc++;
|
*simd++ =
|
||||||
|
__PKHBT(((in2 > 0) ? in2 : (q15_t)__QSUB16(0, in2)),
|
||||||
|
((in1 > 0) ? in1 : (q15_t)__QSUB16(0, in1)), 16);
|
||||||
/* Store the Absolute result in the destination buffer by packing the two values, in a single cycle */
|
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
*simd++ =
|
|
||||||
__PKHBT(((in1 > 0) ? in1 : (q15_t)__QSUB16(0, in1)),
|
in1 = *pSrc++;
|
||||||
((in2 > 0) ? in2 : (q15_t)__QSUB16(0, in2)), 16);
|
in2 = *pSrc++;
|
||||||
|
|
||||||
#else
|
|
||||||
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
|
|
||||||
*simd++ =
|
*simd++ =
|
||||||
__PKHBT(((in2 > 0) ? in2 : (q15_t)__QSUB16(0, in2)),
|
__PKHBT(((in1 > 0) ? in1 : (q15_t)__QSUB16(0, in1)),
|
||||||
((in1 > 0) ? in1 : (q15_t)__QSUB16(0, in1)), 16);
|
((in2 > 0) ? in2 : (q15_t)__QSUB16(0, in2)), 16);
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
#else
|
||||||
|
|
||||||
in1 = *pSrc++;
|
|
||||||
in2 = *pSrc++;
|
*simd++ =
|
||||||
|
__PKHBT(((in2 > 0) ? in2 : (q15_t)__QSUB16(0, in2)),
|
||||||
|
((in1 > 0) ? in1 : (q15_t)__QSUB16(0, in1)), 16);
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
|
||||||
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
*simd++ =
|
|
||||||
__PKHBT(((in1 > 0) ? in1 : (q15_t)__QSUB16(0, in1)),
|
/* Decrement the loop counter */
|
||||||
((in2 > 0) ? in2 : (q15_t)__QSUB16(0, in2)), 16);
|
blkCnt--;
|
||||||
|
}
|
||||||
#else
|
pDst = (q15_t *)simd;
|
||||||
|
|
||||||
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
*simd++ =
|
** No loop unrolling is used. */
|
||||||
__PKHBT(((in2 > 0) ? in2 : (q15_t)__QSUB16(0, in2)),
|
blkCnt = blockSize % 0x4U;
|
||||||
((in1 > 0) ? in1 : (q15_t)__QSUB16(0, in1)), 16);
|
|
||||||
|
while (blkCnt > 0U)
|
||||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
{
|
||||||
|
/* C = |A| */
|
||||||
/* Decrement the loop counter */
|
/* Read the input */
|
||||||
blkCnt--;
|
in1 = *pSrc++;
|
||||||
}
|
|
||||||
pDst = (q15_t *)simd;
|
/* Calculate absolute value of input and then store the result in the destination buffer. */
|
||||||
|
*pDst++ = (in1 > 0) ? in1 : (q15_t)__QSUB16(0, in1);
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
/* Decrement the loop counter */
|
||||||
blkCnt = blockSize % 0x4u;
|
blkCnt--;
|
||||||
|
}
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
#else
|
||||||
/* C = |A| */
|
|
||||||
/* Read the input */
|
/* Run the below code for Cortex-M0 */
|
||||||
in1 = *pSrc++;
|
|
||||||
|
q15_t in; /* Temporary input variable */
|
||||||
/* Calculate absolute value of input and then store the result in the destination buffer. */
|
|
||||||
*pDst++ = (in1 > 0) ? in1 : (q15_t)__QSUB16(0, in1);
|
/* Initialize blkCnt with number of samples */
|
||||||
|
blkCnt = blockSize;
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
while (blkCnt > 0U)
|
||||||
}
|
{
|
||||||
|
/* C = |A| */
|
||||||
#else
|
/* Read the input */
|
||||||
|
in = *pSrc++;
|
||||||
/* Run the below code for Cortex-M0 */
|
|
||||||
|
/* Calculate absolute value of input and then store the result in the destination buffer. */
|
||||||
q15_t in; /* Temporary input variable */
|
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
|
||||||
|
|
||||||
/* Initialize blkCnt with number of samples */
|
/* Decrement the loop counter */
|
||||||
blkCnt = blockSize;
|
blkCnt--;
|
||||||
|
}
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* C = |A| */
|
|
||||||
/* Read the input */
|
}
|
||||||
in = *pSrc++;
|
|
||||||
|
/**
|
||||||
/* Calculate absolute value of input and then store the result in the destination buffer. */
|
* @} end of BasicAbs group
|
||||||
*pDst++ = (in > 0) ? in : ((in == (q15_t) 0x8000) ? 0x7fff : -in);
|
*/
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicAbs group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,130 +1,118 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_abs_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q31 vector absolute value
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_abs_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q31 vector absolute value.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup BasicAbs
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_math.h"
|
/**
|
||||||
|
* @brief Q31 vector absolute value.
|
||||||
/**
|
* @param[in] *pSrc points to the input buffer
|
||||||
* @ingroup groupMath
|
* @param[out] *pDst points to the output buffer
|
||||||
*/
|
* @param[in] blockSize number of samples in each vector
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup BasicAbs
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The function uses saturating arithmetic.
|
||||||
|
* The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
|
||||||
|
*/
|
||||||
/**
|
|
||||||
* @brief Q31 vector absolute value.
|
void arm_abs_q31(
|
||||||
* @param[in] *pSrc points to the input buffer
|
q31_t * pSrc,
|
||||||
* @param[out] *pDst points to the output buffer
|
q31_t * pDst,
|
||||||
* @param[in] blockSize number of samples in each vector
|
uint32_t blockSize)
|
||||||
* @return none.
|
{
|
||||||
*
|
uint32_t blkCnt; /* loop counter */
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
q31_t in; /* Input value */
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
#if defined (ARM_MATH_DSP)
|
||||||
* The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
q31_t in1, in2, in3, in4;
|
||||||
void arm_abs_q31(
|
|
||||||
q31_t * pSrc,
|
/*loop Unrolling */
|
||||||
q31_t * pDst,
|
blkCnt = blockSize >> 2U;
|
||||||
uint32_t blockSize)
|
|
||||||
{
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
uint32_t blkCnt; /* loop counter */
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
q31_t in; /* Input value */
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* C = |A| */
|
||||||
|
/* Calculate absolute of input (if -1 then saturated to 0x7fffffff) and then store the results in the destination buffer. */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
in1 = *pSrc++;
|
||||||
q31_t in1, in2, in3, in4;
|
in2 = *pSrc++;
|
||||||
|
in3 = *pSrc++;
|
||||||
/*loop Unrolling */
|
in4 = *pSrc++;
|
||||||
blkCnt = blockSize >> 2u;
|
|
||||||
|
*pDst++ = (in1 > 0) ? in1 : (q31_t)__QSUB(0, in1);
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
*pDst++ = (in2 > 0) ? in2 : (q31_t)__QSUB(0, in2);
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
*pDst++ = (in3 > 0) ? in3 : (q31_t)__QSUB(0, in3);
|
||||||
while(blkCnt > 0u)
|
*pDst++ = (in4 > 0) ? in4 : (q31_t)__QSUB(0, in4);
|
||||||
{
|
|
||||||
/* C = |A| */
|
/* Decrement the loop counter */
|
||||||
/* Calculate absolute of input (if -1 then saturated to 0x7fffffff) and then store the results in the destination buffer. */
|
blkCnt--;
|
||||||
in1 = *pSrc++;
|
}
|
||||||
in2 = *pSrc++;
|
|
||||||
in3 = *pSrc++;
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
in4 = *pSrc++;
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = blockSize % 0x4U;
|
||||||
*pDst++ = (in1 > 0) ? in1 : (q31_t)__QSUB(0, in1);
|
|
||||||
*pDst++ = (in2 > 0) ? in2 : (q31_t)__QSUB(0, in2);
|
#else
|
||||||
*pDst++ = (in3 > 0) ? in3 : (q31_t)__QSUB(0, in3);
|
|
||||||
*pDst++ = (in4 > 0) ? in4 : (q31_t)__QSUB(0, in4);
|
/* Run the below code for Cortex-M0 */
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
/* Initialize blkCnt with number of samples */
|
||||||
blkCnt--;
|
blkCnt = blockSize;
|
||||||
}
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
while (blkCnt > 0U)
|
||||||
blkCnt = blockSize % 0x4u;
|
{
|
||||||
|
/* C = |A| */
|
||||||
#else
|
/* Calculate absolute value of the input (if -1 then saturated to 0x7fffffff) and then store the results in the destination buffer. */
|
||||||
|
in = *pSrc++;
|
||||||
/* Run the below code for Cortex-M0 */
|
*pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
|
||||||
|
|
||||||
/* Initialize blkCnt with number of samples */
|
/* Decrement the loop counter */
|
||||||
blkCnt = blockSize;
|
blkCnt--;
|
||||||
|
}
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
}
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
/**
|
||||||
/* C = |A| */
|
* @} end of BasicAbs group
|
||||||
/* Calculate absolute value of the input (if -1 then saturated to 0x7fffffff) and then store the results in the destination buffer. */
|
*/
|
||||||
in = *pSrc++;
|
|
||||||
*pDst++ = (in > 0) ? in : ((in == INT32_MIN) ? INT32_MAX : -in);
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicAbs group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,157 +1,145 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_abs_q7.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q7 vector absolute value
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_abs_q7.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q7 vector absolute value.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup BasicAbs
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Q7 vector absolute value.
|
||||||
|
* @param[in] *pSrc points to the input buffer
|
||||||
/**
|
* @param[out] *pDst points to the output buffer
|
||||||
* @ingroup groupMath
|
* @param[in] blockSize number of samples in each vector
|
||||||
*/
|
* @return none.
|
||||||
|
*
|
||||||
/**
|
* \par Conditions for optimum performance
|
||||||
* @addtogroup BasicAbs
|
* Input and output buffers should be aligned by 32-bit
|
||||||
* @{
|
*
|
||||||
*/
|
*
|
||||||
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
/**
|
* \par
|
||||||
* @brief Q7 vector absolute value.
|
* The function uses saturating arithmetic.
|
||||||
* @param[in] *pSrc points to the input buffer
|
* The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.
|
||||||
* @param[out] *pDst points to the output buffer
|
*/
|
||||||
* @param[in] blockSize number of samples in each vector
|
|
||||||
* @return none.
|
void arm_abs_q7(
|
||||||
*
|
q7_t * pSrc,
|
||||||
* \par Conditions for optimum performance
|
q7_t * pDst,
|
||||||
* Input and output buffers should be aligned by 32-bit
|
uint32_t blockSize)
|
||||||
*
|
{
|
||||||
*
|
uint32_t blkCnt; /* loop counter */
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
q7_t in; /* Input value1 */
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
#if defined (ARM_MATH_DSP)
|
||||||
* The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
q31_t in1, in2, in3, in4; /* temporary input variables */
|
||||||
void arm_abs_q7(
|
q31_t out1, out2, out3, out4; /* temporary output variables */
|
||||||
q7_t * pSrc,
|
|
||||||
q7_t * pDst,
|
/*loop Unrolling */
|
||||||
uint32_t blockSize)
|
blkCnt = blockSize >> 2U;
|
||||||
{
|
|
||||||
uint32_t blkCnt; /* loop counter */
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
q7_t in; /* Input value1 */
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
{
|
||||||
|
/* C = |A| */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* Read inputs */
|
||||||
q31_t in1, in2, in3, in4; /* temporary input variables */
|
in1 = (q31_t) * pSrc;
|
||||||
q31_t out1, out2, out3, out4; /* temporary output variables */
|
in2 = (q31_t) * (pSrc + 1);
|
||||||
|
in3 = (q31_t) * (pSrc + 2);
|
||||||
/*loop Unrolling */
|
|
||||||
blkCnt = blockSize >> 2u;
|
/* find absolute value */
|
||||||
|
out1 = (in1 > 0) ? in1 : (q31_t)__QSUB8(0, in1);
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
/* read input */
|
||||||
while(blkCnt > 0u)
|
in4 = (q31_t) * (pSrc + 3);
|
||||||
{
|
|
||||||
/* C = |A| */
|
/* find absolute value */
|
||||||
/* Read inputs */
|
out2 = (in2 > 0) ? in2 : (q31_t)__QSUB8(0, in2);
|
||||||
in1 = (q31_t) * pSrc;
|
|
||||||
in2 = (q31_t) * (pSrc + 1);
|
/* store result to destination */
|
||||||
in3 = (q31_t) * (pSrc + 2);
|
*pDst = (q7_t) out1;
|
||||||
|
|
||||||
/* find absolute value */
|
/* find absolute value */
|
||||||
out1 = (in1 > 0) ? in1 : (q31_t)__QSUB8(0, in1);
|
out3 = (in3 > 0) ? in3 : (q31_t)__QSUB8(0, in3);
|
||||||
|
|
||||||
/* read input */
|
/* find absolute value */
|
||||||
in4 = (q31_t) * (pSrc + 3);
|
out4 = (in4 > 0) ? in4 : (q31_t)__QSUB8(0, in4);
|
||||||
|
|
||||||
/* find absolute value */
|
/* store result to destination */
|
||||||
out2 = (in2 > 0) ? in2 : (q31_t)__QSUB8(0, in2);
|
*(pDst + 1) = (q7_t) out2;
|
||||||
|
|
||||||
/* store result to destination */
|
/* store result to destination */
|
||||||
*pDst = (q7_t) out1;
|
*(pDst + 2) = (q7_t) out3;
|
||||||
|
|
||||||
/* find absolute value */
|
/* store result to destination */
|
||||||
out3 = (in3 > 0) ? in3 : (q31_t)__QSUB8(0, in3);
|
*(pDst + 3) = (q7_t) out4;
|
||||||
|
|
||||||
/* find absolute value */
|
/* update pointers to process next samples */
|
||||||
out4 = (in4 > 0) ? in4 : (q31_t)__QSUB8(0, in4);
|
pSrc += 4U;
|
||||||
|
pDst += 4U;
|
||||||
/* store result to destination */
|
|
||||||
*(pDst + 1) = (q7_t) out2;
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* store result to destination */
|
}
|
||||||
*(pDst + 2) = (q7_t) out3;
|
|
||||||
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
/* store result to destination */
|
** No loop unrolling is used. */
|
||||||
*(pDst + 3) = (q7_t) out4;
|
blkCnt = blockSize % 0x4U;
|
||||||
|
#else
|
||||||
/* update pointers to process next samples */
|
|
||||||
pSrc += 4u;
|
/* Run the below code for Cortex-M0 */
|
||||||
pDst += 4u;
|
blkCnt = blockSize;
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
#endif /* #define ARM_MATH_CM0_FAMILY */
|
||||||
blkCnt--;
|
|
||||||
}
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
/* C = |A| */
|
||||||
** No loop unrolling is used. */
|
/* Read the input */
|
||||||
blkCnt = blockSize % 0x4u;
|
in = *pSrc++;
|
||||||
#else
|
|
||||||
|
/* Store the Absolute result in the destination buffer */
|
||||||
/* Run the below code for Cortex-M0 */
|
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? 0x7f : -in);
|
||||||
blkCnt = blockSize;
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
#endif // #define ARM_MATH_CM0_FAMILY
|
blkCnt--;
|
||||||
|
}
|
||||||
while(blkCnt > 0u)
|
}
|
||||||
{
|
|
||||||
/* C = |A| */
|
/**
|
||||||
/* Read the input */
|
* @} end of BasicAbs group
|
||||||
in = *pSrc++;
|
*/
|
||||||
|
|
||||||
/* Store the Absolute result in the destination buffer */
|
|
||||||
*pDst++ = (in > 0) ? in : ((in == (q7_t) 0x80) ? 0x7f : -in);
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicAbs group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,150 +1,138 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_add_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Floating-point vector addition
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_add_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Floating-point vector addition.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @defgroup BasicAdd Vector Addition
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* Element-by-element addition of two vectors.
|
||||||
* ---------------------------------------------------------------------------- */
|
*
|
||||||
|
* <pre>
|
||||||
#include "arm_math.h"
|
* pDst[n] = pSrcA[n] + pSrcB[n], 0 <= n < blockSize.
|
||||||
|
* </pre>
|
||||||
/**
|
*
|
||||||
* @ingroup groupMath
|
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @defgroup BasicAdd Vector Addition
|
* @addtogroup BasicAdd
|
||||||
*
|
* @{
|
||||||
* Element-by-element addition of two vectors.
|
*/
|
||||||
*
|
|
||||||
* <pre>
|
/**
|
||||||
* pDst[n] = pSrcA[n] + pSrcB[n], 0 <= n < blockSize.
|
* @brief Floating-point vector addition.
|
||||||
* </pre>
|
* @param[in] *pSrcA points to the first input vector
|
||||||
*
|
* @param[in] *pSrcB points to the second input vector
|
||||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
* @param[out] *pDst points to the output vector
|
||||||
*/
|
* @param[in] blockSize number of samples in each vector
|
||||||
|
* @return none.
|
||||||
/**
|
*/
|
||||||
* @addtogroup BasicAdd
|
|
||||||
* @{
|
void arm_add_f32(
|
||||||
*/
|
float32_t * pSrcA,
|
||||||
|
float32_t * pSrcB,
|
||||||
/**
|
float32_t * pDst,
|
||||||
* @brief Floating-point vector addition.
|
uint32_t blockSize)
|
||||||
* @param[in] *pSrcA points to the first input vector
|
{
|
||||||
* @param[in] *pSrcB points to the second input vector
|
uint32_t blkCnt; /* loop counter */
|
||||||
* @param[out] *pDst points to the output vector
|
|
||||||
* @param[in] blockSize number of samples in each vector
|
#if defined (ARM_MATH_DSP)
|
||||||
* @return none.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
float32_t inA1, inA2, inA3, inA4; /* temporary input variabels */
|
||||||
void arm_add_f32(
|
float32_t inB1, inB2, inB3, inB4; /* temporary input variables */
|
||||||
float32_t * pSrcA,
|
|
||||||
float32_t * pSrcB,
|
/*loop Unrolling */
|
||||||
float32_t * pDst,
|
blkCnt = blockSize >> 2U;
|
||||||
uint32_t blockSize)
|
|
||||||
{
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
uint32_t blkCnt; /* loop counter */
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
{
|
||||||
|
/* C = A + B */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* Add and then store the results in the destination buffer. */
|
||||||
float32_t inA1, inA2, inA3, inA4; /* temporary input variabels */
|
|
||||||
float32_t inB1, inB2, inB3, inB4; /* temporary input variables */
|
/* read four inputs from sourceA and four inputs from sourceB */
|
||||||
|
inA1 = *pSrcA;
|
||||||
/*loop Unrolling */
|
inB1 = *pSrcB;
|
||||||
blkCnt = blockSize >> 2u;
|
inA2 = *(pSrcA + 1);
|
||||||
|
inB2 = *(pSrcB + 1);
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
inA3 = *(pSrcA + 2);
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
inB3 = *(pSrcB + 2);
|
||||||
while(blkCnt > 0u)
|
inA4 = *(pSrcA + 3);
|
||||||
{
|
inB4 = *(pSrcB + 3);
|
||||||
/* C = A + B */
|
|
||||||
/* Add and then store the results in the destination buffer. */
|
/* C = A + B */
|
||||||
|
/* add and store result to destination */
|
||||||
/* read four inputs from sourceA and four inputs from sourceB */
|
*pDst = inA1 + inB1;
|
||||||
inA1 = *pSrcA;
|
*(pDst + 1) = inA2 + inB2;
|
||||||
inB1 = *pSrcB;
|
*(pDst + 2) = inA3 + inB3;
|
||||||
inA2 = *(pSrcA + 1);
|
*(pDst + 3) = inA4 + inB4;
|
||||||
inB2 = *(pSrcB + 1);
|
|
||||||
inA3 = *(pSrcA + 2);
|
/* update pointers to process next samples */
|
||||||
inB3 = *(pSrcB + 2);
|
pSrcA += 4U;
|
||||||
inA4 = *(pSrcA + 3);
|
pSrcB += 4U;
|
||||||
inB4 = *(pSrcB + 3);
|
pDst += 4U;
|
||||||
|
|
||||||
/* C = A + B */
|
|
||||||
/* add and store result to destination */
|
/* Decrement the loop counter */
|
||||||
*pDst = inA1 + inB1;
|
blkCnt--;
|
||||||
*(pDst + 1) = inA2 + inB2;
|
}
|
||||||
*(pDst + 2) = inA3 + inB3;
|
|
||||||
*(pDst + 3) = inA4 + inB4;
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
|
** No loop unrolling is used. */
|
||||||
/* update pointers to process next samples */
|
blkCnt = blockSize % 0x4U;
|
||||||
pSrcA += 4u;
|
|
||||||
pSrcB += 4u;
|
#else
|
||||||
pDst += 4u;
|
|
||||||
|
/* Run the below code for Cortex-M0 */
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
/* Initialize blkCnt with number of samples */
|
||||||
blkCnt--;
|
blkCnt = blockSize;
|
||||||
}
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
while (blkCnt > 0U)
|
||||||
blkCnt = blockSize % 0x4u;
|
{
|
||||||
|
/* C = A + B */
|
||||||
#else
|
/* Add and then store the results in the destination buffer. */
|
||||||
|
*pDst++ = (*pSrcA++) + (*pSrcB++);
|
||||||
/* Run the below code for Cortex-M0 */
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* Initialize blkCnt with number of samples */
|
blkCnt--;
|
||||||
blkCnt = blockSize;
|
}
|
||||||
|
}
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
/**
|
||||||
while(blkCnt > 0u)
|
* @} end of BasicAdd group
|
||||||
{
|
*/
|
||||||
/* C = A + B */
|
|
||||||
/* Add and then store the results in the destination buffer. */
|
|
||||||
*pDst++ = (*pSrcA++) + (*pSrcB++);
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicAdd group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,140 +1,128 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_add_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q15 vector addition
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_add_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q15 vector addition
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup BasicAdd
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Q15 vector addition.
|
||||||
|
* @param[in] *pSrcA points to the first input vector
|
||||||
/**
|
* @param[in] *pSrcB points to the second input vector
|
||||||
* @ingroup groupMath
|
* @param[out] *pDst points to the output vector
|
||||||
*/
|
* @param[in] blockSize number of samples in each vector
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup BasicAdd
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The function uses saturating arithmetic.
|
||||||
|
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
||||||
/**
|
*/
|
||||||
* @brief Q15 vector addition.
|
|
||||||
* @param[in] *pSrcA points to the first input vector
|
void arm_add_q15(
|
||||||
* @param[in] *pSrcB points to the second input vector
|
q15_t * pSrcA,
|
||||||
* @param[out] *pDst points to the output vector
|
q15_t * pSrcB,
|
||||||
* @param[in] blockSize number of samples in each vector
|
q15_t * pDst,
|
||||||
* @return none.
|
uint32_t blockSize)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blkCnt; /* loop counter */
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
#if defined (ARM_MATH_DSP)
|
||||||
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
q31_t inA1, inA2, inB1, inB2;
|
||||||
void arm_add_q15(
|
|
||||||
q15_t * pSrcA,
|
/*loop Unrolling */
|
||||||
q15_t * pSrcB,
|
blkCnt = blockSize >> 2U;
|
||||||
q15_t * pDst,
|
|
||||||
uint32_t blockSize)
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
{
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
uint32_t blkCnt; /* loop counter */
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* C = A + B */
|
||||||
|
/* Add and then store the results in the destination buffer. */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
inA1 = *__SIMD32(pSrcA)++;
|
||||||
q31_t inA1, inA2, inB1, inB2;
|
inA2 = *__SIMD32(pSrcA)++;
|
||||||
|
inB1 = *__SIMD32(pSrcB)++;
|
||||||
/*loop Unrolling */
|
inB2 = *__SIMD32(pSrcB)++;
|
||||||
blkCnt = blockSize >> 2u;
|
|
||||||
|
*__SIMD32(pDst)++ = __QADD16(inA1, inB1);
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
*__SIMD32(pDst)++ = __QADD16(inA2, inB2);
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
|
||||||
while(blkCnt > 0u)
|
/* Decrement the loop counter */
|
||||||
{
|
blkCnt--;
|
||||||
/* C = A + B */
|
}
|
||||||
/* Add and then store the results in the destination buffer. */
|
|
||||||
inA1 = *__SIMD32(pSrcA)++;
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
inA2 = *__SIMD32(pSrcA)++;
|
** No loop unrolling is used. */
|
||||||
inB1 = *__SIMD32(pSrcB)++;
|
blkCnt = blockSize % 0x4U;
|
||||||
inB2 = *__SIMD32(pSrcB)++;
|
|
||||||
|
while (blkCnt > 0U)
|
||||||
*__SIMD32(pDst)++ = __QADD16(inA1, inB1);
|
{
|
||||||
*__SIMD32(pDst)++ = __QADD16(inA2, inB2);
|
/* C = A + B */
|
||||||
|
/* Add and then store the results in the destination buffer. */
|
||||||
/* Decrement the loop counter */
|
*pDst++ = (q15_t) __QADD16(*pSrcA++, *pSrcB++);
|
||||||
blkCnt--;
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
}
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
#else
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/* Run the below code for Cortex-M0 */
|
||||||
{
|
|
||||||
/* C = A + B */
|
|
||||||
/* Add and then store the results in the destination buffer. */
|
|
||||||
*pDst++ = (q15_t) __QADD16(*pSrcA++, *pSrcB++);
|
/* Initialize blkCnt with number of samples */
|
||||||
|
blkCnt = blockSize;
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
while (blkCnt > 0U)
|
||||||
}
|
{
|
||||||
|
/* C = A + B */
|
||||||
#else
|
/* Add and then store the results in the destination buffer. */
|
||||||
|
*pDst++ = (q15_t) __SSAT(((q31_t) * pSrcA++ + *pSrcB++), 16);
|
||||||
/* Run the below code for Cortex-M0 */
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
|
}
|
||||||
/* Initialize blkCnt with number of samples */
|
|
||||||
blkCnt = blockSize;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
}
|
||||||
/* C = A + B */
|
|
||||||
/* Add and then store the results in the destination buffer. */
|
/**
|
||||||
*pDst++ = (q15_t) __SSAT(((q31_t) * pSrcA++ + *pSrcB++), 16);
|
* @} end of BasicAdd group
|
||||||
|
*/
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicAdd group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,148 +1,136 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_add_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q31 vector addition
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_add_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q31 vector addition.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup BasicAdd
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_math.h"
|
/**
|
||||||
|
* @brief Q31 vector addition.
|
||||||
/**
|
* @param[in] *pSrcA points to the first input vector
|
||||||
* @ingroup groupMath
|
* @param[in] *pSrcB points to the second input vector
|
||||||
*/
|
* @param[out] *pDst points to the output vector
|
||||||
|
* @param[in] blockSize number of samples in each vector
|
||||||
/**
|
* @return none.
|
||||||
* @addtogroup BasicAdd
|
*
|
||||||
* @{
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
*/
|
* \par
|
||||||
|
* The function uses saturating arithmetic.
|
||||||
|
* Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.
|
||||||
/**
|
*/
|
||||||
* @brief Q31 vector addition.
|
|
||||||
* @param[in] *pSrcA points to the first input vector
|
void arm_add_q31(
|
||||||
* @param[in] *pSrcB points to the second input vector
|
q31_t * pSrcA,
|
||||||
* @param[out] *pDst points to the output vector
|
q31_t * pSrcB,
|
||||||
* @param[in] blockSize number of samples in each vector
|
q31_t * pDst,
|
||||||
* @return none.
|
uint32_t blockSize)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blkCnt; /* loop counter */
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
#if defined (ARM_MATH_DSP)
|
||||||
* Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
q31_t inA1, inA2, inA3, inA4;
|
||||||
void arm_add_q31(
|
q31_t inB1, inB2, inB3, inB4;
|
||||||
q31_t * pSrcA,
|
|
||||||
q31_t * pSrcB,
|
/*loop Unrolling */
|
||||||
q31_t * pDst,
|
blkCnt = blockSize >> 2U;
|
||||||
uint32_t blockSize)
|
|
||||||
{
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
uint32_t blkCnt; /* loop counter */
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
{
|
||||||
|
/* C = A + B */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* Add and then store the results in the destination buffer. */
|
||||||
q31_t inA1, inA2, inA3, inA4;
|
inA1 = *pSrcA++;
|
||||||
q31_t inB1, inB2, inB3, inB4;
|
inA2 = *pSrcA++;
|
||||||
|
inB1 = *pSrcB++;
|
||||||
/*loop Unrolling */
|
inB2 = *pSrcB++;
|
||||||
blkCnt = blockSize >> 2u;
|
|
||||||
|
inA3 = *pSrcA++;
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
inA4 = *pSrcA++;
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
inB3 = *pSrcB++;
|
||||||
while(blkCnt > 0u)
|
inB4 = *pSrcB++;
|
||||||
{
|
|
||||||
/* C = A + B */
|
*pDst++ = __QADD(inA1, inB1);
|
||||||
/* Add and then store the results in the destination buffer. */
|
*pDst++ = __QADD(inA2, inB2);
|
||||||
inA1 = *pSrcA++;
|
*pDst++ = __QADD(inA3, inB3);
|
||||||
inA2 = *pSrcA++;
|
*pDst++ = __QADD(inA4, inB4);
|
||||||
inB1 = *pSrcB++;
|
|
||||||
inB2 = *pSrcB++;
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
inA3 = *pSrcA++;
|
}
|
||||||
inA4 = *pSrcA++;
|
|
||||||
inB3 = *pSrcB++;
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
inB4 = *pSrcB++;
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = blockSize % 0x4U;
|
||||||
*pDst++ = __QADD(inA1, inB1);
|
|
||||||
*pDst++ = __QADD(inA2, inB2);
|
while (blkCnt > 0U)
|
||||||
*pDst++ = __QADD(inA3, inB3);
|
{
|
||||||
*pDst++ = __QADD(inA4, inB4);
|
/* C = A + B */
|
||||||
|
/* Add and then store the results in the destination buffer. */
|
||||||
/* Decrement the loop counter */
|
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
|
||||||
blkCnt--;
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
}
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
#else
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/* Run the below code for Cortex-M0 */
|
||||||
{
|
|
||||||
/* C = A + B */
|
|
||||||
/* Add and then store the results in the destination buffer. */
|
|
||||||
*pDst++ = __QADD(*pSrcA++, *pSrcB++);
|
/* Initialize blkCnt with number of samples */
|
||||||
|
blkCnt = blockSize;
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
while (blkCnt > 0U)
|
||||||
}
|
{
|
||||||
|
/* C = A + B */
|
||||||
#else
|
/* Add and then store the results in the destination buffer. */
|
||||||
|
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrcA++ + *pSrcB++);
|
||||||
/* Run the below code for Cortex-M0 */
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
|
}
|
||||||
/* Initialize blkCnt with number of samples */
|
|
||||||
blkCnt = blockSize;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
}
|
||||||
{
|
|
||||||
/* C = A + B */
|
/**
|
||||||
/* Add and then store the results in the destination buffer. */
|
* @} end of BasicAdd group
|
||||||
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrcA++ + *pSrcB++);
|
*/
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicAdd group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,134 +1,122 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_add_q7.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q7 vector addition
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_add_q7.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q7 vector addition.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup BasicAdd
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Q7 vector addition.
|
||||||
|
* @param[in] *pSrcA points to the first input vector
|
||||||
/**
|
* @param[in] *pSrcB points to the second input vector
|
||||||
* @ingroup groupMath
|
* @param[out] *pDst points to the output vector
|
||||||
*/
|
* @param[in] blockSize number of samples in each vector
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup BasicAdd
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The function uses saturating arithmetic.
|
||||||
|
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
|
||||||
/**
|
*/
|
||||||
* @brief Q7 vector addition.
|
|
||||||
* @param[in] *pSrcA points to the first input vector
|
void arm_add_q7(
|
||||||
* @param[in] *pSrcB points to the second input vector
|
q7_t * pSrcA,
|
||||||
* @param[out] *pDst points to the output vector
|
q7_t * pSrcB,
|
||||||
* @param[in] blockSize number of samples in each vector
|
q7_t * pDst,
|
||||||
* @return none.
|
uint32_t blockSize)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blkCnt; /* loop counter */
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
#if defined (ARM_MATH_DSP)
|
||||||
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
|
||||||
void arm_add_q7(
|
|
||||||
q7_t * pSrcA,
|
/*loop Unrolling */
|
||||||
q7_t * pSrcB,
|
blkCnt = blockSize >> 2U;
|
||||||
q7_t * pDst,
|
|
||||||
uint32_t blockSize)
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
{
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
uint32_t blkCnt; /* loop counter */
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* C = A + B */
|
||||||
|
/* Add and then store the results in the destination buffer. */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
*__SIMD32(pDst)++ = __QADD8(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++);
|
||||||
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/*loop Unrolling */
|
blkCnt--;
|
||||||
blkCnt = blockSize >> 2u;
|
}
|
||||||
|
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
** No loop unrolling is used. */
|
||||||
while(blkCnt > 0u)
|
blkCnt = blockSize % 0x4U;
|
||||||
{
|
|
||||||
/* C = A + B */
|
while (blkCnt > 0U)
|
||||||
/* Add and then store the results in the destination buffer. */
|
{
|
||||||
*__SIMD32(pDst)++ = __QADD8(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++);
|
/* C = A + B */
|
||||||
|
/* Add and then store the results in the destination buffer. */
|
||||||
/* Decrement the loop counter */
|
*pDst++ = (q7_t) __SSAT(*pSrcA++ + *pSrcB++, 8);
|
||||||
blkCnt--;
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
}
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
#else
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/* Run the below code for Cortex-M0 */
|
||||||
{
|
|
||||||
/* C = A + B */
|
|
||||||
/* Add and then store the results in the destination buffer. */
|
|
||||||
*pDst++ = (q7_t) __SSAT(*pSrcA++ + *pSrcB++, 8);
|
/* Initialize blkCnt with number of samples */
|
||||||
|
blkCnt = blockSize;
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
while (blkCnt > 0U)
|
||||||
}
|
{
|
||||||
|
/* C = A + B */
|
||||||
#else
|
/* Add and then store the results in the destination buffer. */
|
||||||
|
*pDst++ = (q7_t) __SSAT((q15_t) * pSrcA++ + *pSrcB++, 8);
|
||||||
/* Run the below code for Cortex-M0 */
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
|
}
|
||||||
/* Initialize blkCnt with number of samples */
|
|
||||||
blkCnt = blockSize;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
}
|
||||||
/* C = A + B */
|
|
||||||
/* Add and then store the results in the destination buffer. */
|
/**
|
||||||
*pDst++ = (q7_t) __SSAT((q15_t) * pSrcA++ + *pSrcB++, 8);
|
* @} end of BasicAdd group
|
||||||
|
*/
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicAdd group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,135 +1,123 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_dot_prod_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Floating-point dot product
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_dot_prod_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Floating-point dot product.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @defgroup dot_prod Vector Dot Product
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* Computes the dot product of two vectors.
|
||||||
* ---------------------------------------------------------------------------- */
|
* The vectors are multiplied element-by-element and then summed.
|
||||||
|
*
|
||||||
#include "arm_math.h"
|
* <pre>
|
||||||
|
* sum = pSrcA[0]*pSrcB[0] + pSrcA[1]*pSrcB[1] + ... + pSrcA[blockSize-1]*pSrcB[blockSize-1]
|
||||||
/**
|
* </pre>
|
||||||
* @ingroup groupMath
|
*
|
||||||
*/
|
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||||
|
*/
|
||||||
/**
|
|
||||||
* @defgroup dot_prod Vector Dot Product
|
/**
|
||||||
*
|
* @addtogroup dot_prod
|
||||||
* Computes the dot product of two vectors.
|
* @{
|
||||||
* The vectors are multiplied element-by-element and then summed.
|
*/
|
||||||
*
|
|
||||||
* <pre>
|
/**
|
||||||
* sum = pSrcA[0]*pSrcB[0] + pSrcA[1]*pSrcB[1] + ... + pSrcA[blockSize-1]*pSrcB[blockSize-1]
|
* @brief Dot product of floating-point vectors.
|
||||||
* </pre>
|
* @param[in] *pSrcA points to the first input vector
|
||||||
*
|
* @param[in] *pSrcB points to the second input vector
|
||||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
* @param[in] blockSize number of samples in each vector
|
||||||
*/
|
* @param[out] *result output result returned here
|
||||||
|
* @return none.
|
||||||
/**
|
*/
|
||||||
* @addtogroup dot_prod
|
|
||||||
* @{
|
|
||||||
*/
|
void arm_dot_prod_f32(
|
||||||
|
float32_t * pSrcA,
|
||||||
/**
|
float32_t * pSrcB,
|
||||||
* @brief Dot product of floating-point vectors.
|
uint32_t blockSize,
|
||||||
* @param[in] *pSrcA points to the first input vector
|
float32_t * result)
|
||||||
* @param[in] *pSrcB points to the second input vector
|
{
|
||||||
* @param[in] blockSize number of samples in each vector
|
float32_t sum = 0.0f; /* Temporary result storage */
|
||||||
* @param[out] *result output result returned here
|
uint32_t blkCnt; /* loop counter */
|
||||||
* @return none.
|
|
||||||
*/
|
|
||||||
|
#if defined (ARM_MATH_DSP)
|
||||||
|
|
||||||
void arm_dot_prod_f32(
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
float32_t * pSrcA,
|
/*loop Unrolling */
|
||||||
float32_t * pSrcB,
|
blkCnt = blockSize >> 2U;
|
||||||
uint32_t blockSize,
|
|
||||||
float32_t * result)
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
{
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
float32_t sum = 0.0f; /* Temporary result storage */
|
while (blkCnt > 0U)
|
||||||
uint32_t blkCnt; /* loop counter */
|
{
|
||||||
|
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||||
|
/* Calculate dot product and then store the result in a temporary buffer */
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
sum += (*pSrcA++) * (*pSrcB++);
|
||||||
|
sum += (*pSrcA++) * (*pSrcB++);
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
sum += (*pSrcA++) * (*pSrcB++);
|
||||||
/*loop Unrolling */
|
sum += (*pSrcA++) * (*pSrcB++);
|
||||||
blkCnt = blockSize >> 2u;
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
blkCnt--;
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
}
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
** No loop unrolling is used. */
|
||||||
/* Calculate dot product and then store the result in a temporary buffer */
|
blkCnt = blockSize % 0x4U;
|
||||||
sum += (*pSrcA++) * (*pSrcB++);
|
|
||||||
sum += (*pSrcA++) * (*pSrcB++);
|
#else
|
||||||
sum += (*pSrcA++) * (*pSrcB++);
|
|
||||||
sum += (*pSrcA++) * (*pSrcB++);
|
/* Run the below code for Cortex-M0 */
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
/* Initialize blkCnt with number of samples */
|
||||||
blkCnt--;
|
blkCnt = blockSize;
|
||||||
}
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#else
|
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||||
|
/* Calculate dot product and then store the result in a temporary buffer. */
|
||||||
/* Run the below code for Cortex-M0 */
|
sum += (*pSrcA++) * (*pSrcB++);
|
||||||
|
|
||||||
/* Initialize blkCnt with number of samples */
|
/* Decrement the loop counter */
|
||||||
blkCnt = blockSize;
|
blkCnt--;
|
||||||
|
}
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
/* Store the result back in the destination buffer */
|
||||||
|
*result = sum;
|
||||||
|
}
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
/**
|
||||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
* @} end of dot_prod group
|
||||||
/* Calculate dot product and then store the result in a temporary buffer. */
|
*/
|
||||||
sum += (*pSrcA++) * (*pSrcB++);
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
/* Store the result back in the destination buffer */
|
|
||||||
*result = sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of dot_prod group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,140 +1,128 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_dot_prod_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q15 dot product
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_dot_prod_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q15 dot product.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup dot_prod
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Dot product of Q15 vectors.
|
||||||
|
* @param[in] *pSrcA points to the first input vector
|
||||||
/**
|
* @param[in] *pSrcB points to the second input vector
|
||||||
* @ingroup groupMath
|
* @param[in] blockSize number of samples in each vector
|
||||||
*/
|
* @param[out] *result output result returned here
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup dot_prod
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The intermediate multiplications are in 1.15 x 1.15 = 2.30 format and these
|
||||||
|
* results are added to a 64-bit accumulator in 34.30 format.
|
||||||
/**
|
* Nonsaturating additions are used and given that there are 33 guard bits in the accumulator
|
||||||
* @brief Dot product of Q15 vectors.
|
* there is no risk of overflow.
|
||||||
* @param[in] *pSrcA points to the first input vector
|
* The return result is in 34.30 format.
|
||||||
* @param[in] *pSrcB points to the second input vector
|
*/
|
||||||
* @param[in] blockSize number of samples in each vector
|
|
||||||
* @param[out] *result output result returned here
|
void arm_dot_prod_q15(
|
||||||
* @return none.
|
q15_t * pSrcA,
|
||||||
*
|
q15_t * pSrcB,
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blockSize,
|
||||||
* \par
|
q63_t * result)
|
||||||
* The intermediate multiplications are in 1.15 x 1.15 = 2.30 format and these
|
{
|
||||||
* results are added to a 64-bit accumulator in 34.30 format.
|
q63_t sum = 0; /* Temporary result storage */
|
||||||
* Nonsaturating additions are used and given that there are 33 guard bits in the accumulator
|
uint32_t blkCnt; /* loop counter */
|
||||||
* there is no risk of overflow.
|
|
||||||
* The return result is in 34.30 format.
|
#if defined (ARM_MATH_DSP)
|
||||||
*/
|
|
||||||
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
void arm_dot_prod_q15(
|
|
||||||
q15_t * pSrcA,
|
|
||||||
q15_t * pSrcB,
|
/*loop Unrolling */
|
||||||
uint32_t blockSize,
|
blkCnt = blockSize >> 2U;
|
||||||
q63_t * result)
|
|
||||||
{
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
q63_t sum = 0; /* Temporary result storage */
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
uint32_t blkCnt; /* loop counter */
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||||
|
/* Calculate dot product and then store the result in a temporary buffer. */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
sum = __SMLALD(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++, sum);
|
||||||
|
sum = __SMLALD(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++, sum);
|
||||||
|
|
||||||
/*loop Unrolling */
|
/* Decrement the loop counter */
|
||||||
blkCnt = blockSize >> 2u;
|
blkCnt--;
|
||||||
|
}
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
while(blkCnt > 0u)
|
** No loop unrolling is used. */
|
||||||
{
|
blkCnt = blockSize % 0x4U;
|
||||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
|
||||||
/* Calculate dot product and then store the result in a temporary buffer. */
|
while (blkCnt > 0U)
|
||||||
sum = __SMLALD(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++, sum);
|
{
|
||||||
sum = __SMLALD(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++, sum);
|
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||||
|
/* Calculate dot product and then store the results in a temporary buffer. */
|
||||||
/* Decrement the loop counter */
|
sum = __SMLALD(*pSrcA++, *pSrcB++, sum);
|
||||||
blkCnt--;
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
}
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
|
||||||
|
#else
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
/* Run the below code for Cortex-M0 */
|
||||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
|
||||||
/* Calculate dot product and then store the results in a temporary buffer. */
|
/* Initialize blkCnt with number of samples */
|
||||||
sum = __SMLALD(*pSrcA++, *pSrcB++, sum);
|
blkCnt = blockSize;
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
while (blkCnt > 0U)
|
||||||
blkCnt--;
|
{
|
||||||
}
|
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||||
|
/* Calculate dot product and then store the results in a temporary buffer. */
|
||||||
|
sum += (q63_t) ((q31_t) * pSrcA++ * *pSrcB++);
|
||||||
#else
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* Run the below code for Cortex-M0 */
|
blkCnt--;
|
||||||
|
}
|
||||||
/* Initialize blkCnt with number of samples */
|
|
||||||
blkCnt = blockSize;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/* Store the result in the destination buffer in 34.30 format */
|
||||||
{
|
*result = sum;
|
||||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
|
||||||
/* Calculate dot product and then store the results in a temporary buffer. */
|
}
|
||||||
sum += (q63_t) ((q31_t) * pSrcA++ * *pSrcB++);
|
|
||||||
|
/**
|
||||||
/* Decrement the loop counter */
|
* @} end of dot_prod group
|
||||||
blkCnt--;
|
*/
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
/* Store the result in the destination buffer in 34.30 format */
|
|
||||||
*result = sum;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of dot_prod group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,143 +1,131 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_dot_prod_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q31 dot product
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_dot_prod_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q31 dot product.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup dot_prod
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Dot product of Q31 vectors.
|
||||||
|
* @param[in] *pSrcA points to the first input vector
|
||||||
/**
|
* @param[in] *pSrcB points to the second input vector
|
||||||
* @ingroup groupMath
|
* @param[in] blockSize number of samples in each vector
|
||||||
*/
|
* @param[out] *result output result returned here
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup dot_prod
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The intermediate multiplications are in 1.31 x 1.31 = 2.62 format and these
|
||||||
|
* are truncated to 2.48 format by discarding the lower 14 bits.
|
||||||
/**
|
* The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.
|
||||||
* @brief Dot product of Q31 vectors.
|
* There are 15 guard bits in the accumulator and there is no risk of overflow as long as
|
||||||
* @param[in] *pSrcA points to the first input vector
|
* the length of the vectors is less than 2^16 elements.
|
||||||
* @param[in] *pSrcB points to the second input vector
|
* The return result is in 16.48 format.
|
||||||
* @param[in] blockSize number of samples in each vector
|
*/
|
||||||
* @param[out] *result output result returned here
|
|
||||||
* @return none.
|
void arm_dot_prod_q31(
|
||||||
*
|
q31_t * pSrcA,
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
q31_t * pSrcB,
|
||||||
* \par
|
uint32_t blockSize,
|
||||||
* The intermediate multiplications are in 1.31 x 1.31 = 2.62 format and these
|
q63_t * result)
|
||||||
* are truncated to 2.48 format by discarding the lower 14 bits.
|
{
|
||||||
* The 2.48 result is then added without saturation to a 64-bit accumulator in 16.48 format.
|
q63_t sum = 0; /* Temporary result storage */
|
||||||
* There are 15 guard bits in the accumulator and there is no risk of overflow as long as
|
uint32_t blkCnt; /* loop counter */
|
||||||
* the length of the vectors is less than 2^16 elements.
|
|
||||||
* The return result is in 16.48 format.
|
|
||||||
*/
|
#if defined (ARM_MATH_DSP)
|
||||||
|
|
||||||
void arm_dot_prod_q31(
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
q31_t * pSrcA,
|
q31_t inA1, inA2, inA3, inA4;
|
||||||
q31_t * pSrcB,
|
q31_t inB1, inB2, inB3, inB4;
|
||||||
uint32_t blockSize,
|
|
||||||
q63_t * result)
|
/*loop Unrolling */
|
||||||
{
|
blkCnt = blockSize >> 2U;
|
||||||
q63_t sum = 0; /* Temporary result storage */
|
|
||||||
uint32_t blkCnt; /* loop counter */
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
{
|
||||||
|
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* Calculate dot product and then store the result in a temporary buffer. */
|
||||||
q31_t inA1, inA2, inA3, inA4;
|
inA1 = *pSrcA++;
|
||||||
q31_t inB1, inB2, inB3, inB4;
|
inA2 = *pSrcA++;
|
||||||
|
inA3 = *pSrcA++;
|
||||||
/*loop Unrolling */
|
inA4 = *pSrcA++;
|
||||||
blkCnt = blockSize >> 2u;
|
inB1 = *pSrcB++;
|
||||||
|
inB2 = *pSrcB++;
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
inB3 = *pSrcB++;
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
inB4 = *pSrcB++;
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
sum += ((q63_t) inA1 * inB1) >> 14U;
|
||||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
sum += ((q63_t) inA2 * inB2) >> 14U;
|
||||||
/* Calculate dot product and then store the result in a temporary buffer. */
|
sum += ((q63_t) inA3 * inB3) >> 14U;
|
||||||
inA1 = *pSrcA++;
|
sum += ((q63_t) inA4 * inB4) >> 14U;
|
||||||
inA2 = *pSrcA++;
|
|
||||||
inA3 = *pSrcA++;
|
/* Decrement the loop counter */
|
||||||
inA4 = *pSrcA++;
|
blkCnt--;
|
||||||
inB1 = *pSrcB++;
|
}
|
||||||
inB2 = *pSrcB++;
|
|
||||||
inB3 = *pSrcB++;
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
inB4 = *pSrcB++;
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = blockSize % 0x4U;
|
||||||
sum += ((q63_t) inA1 * inB1) >> 14u;
|
|
||||||
sum += ((q63_t) inA2 * inB2) >> 14u;
|
#else
|
||||||
sum += ((q63_t) inA3 * inB3) >> 14u;
|
|
||||||
sum += ((q63_t) inA4 * inB4) >> 14u;
|
/* Run the below code for Cortex-M0 */
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
/* Initialize blkCnt with number of samples */
|
||||||
blkCnt--;
|
blkCnt = blockSize;
|
||||||
}
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#else
|
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||||
|
/* Calculate dot product and then store the result in a temporary buffer. */
|
||||||
/* Run the below code for Cortex-M0 */
|
sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14U;
|
||||||
|
|
||||||
/* Initialize blkCnt with number of samples */
|
/* Decrement the loop counter */
|
||||||
blkCnt = blockSize;
|
blkCnt--;
|
||||||
|
}
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
/* Store the result in the destination buffer in 16.48 format */
|
||||||
|
*result = sum;
|
||||||
while(blkCnt > 0u)
|
}
|
||||||
{
|
|
||||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
/**
|
||||||
/* Calculate dot product and then store the result in a temporary buffer. */
|
* @} end of dot_prod group
|
||||||
sum += ((q63_t) * pSrcA++ * *pSrcB++) >> 14u;
|
*/
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Store the result in the destination buffer in 16.48 format */
|
|
||||||
*result = sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of dot_prod group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,159 +1,147 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_dot_prod_q7.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q7 dot product
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_dot_prod_q7.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q7 dot product.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup dot_prod
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Dot product of Q7 vectors.
|
||||||
|
* @param[in] *pSrcA points to the first input vector
|
||||||
/**
|
* @param[in] *pSrcB points to the second input vector
|
||||||
* @ingroup groupMath
|
* @param[in] blockSize number of samples in each vector
|
||||||
*/
|
* @param[out] *result output result returned here
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup dot_prod
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The intermediate multiplications are in 1.7 x 1.7 = 2.14 format and these
|
||||||
|
* results are added to an accumulator in 18.14 format.
|
||||||
/**
|
* Nonsaturating additions are used and there is no danger of wrap around as long as
|
||||||
* @brief Dot product of Q7 vectors.
|
* the vectors are less than 2^18 elements long.
|
||||||
* @param[in] *pSrcA points to the first input vector
|
* The return result is in 18.14 format.
|
||||||
* @param[in] *pSrcB points to the second input vector
|
*/
|
||||||
* @param[in] blockSize number of samples in each vector
|
|
||||||
* @param[out] *result output result returned here
|
void arm_dot_prod_q7(
|
||||||
* @return none.
|
q7_t * pSrcA,
|
||||||
*
|
q7_t * pSrcB,
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blockSize,
|
||||||
* \par
|
q31_t * result)
|
||||||
* The intermediate multiplications are in 1.7 x 1.7 = 2.14 format and these
|
{
|
||||||
* results are added to an accumulator in 18.14 format.
|
uint32_t blkCnt; /* loop counter */
|
||||||
* Nonsaturating additions are used and there is no danger of wrap around as long as
|
|
||||||
* the vectors are less than 2^18 elements long.
|
q31_t sum = 0; /* Temporary variables to store output */
|
||||||
* The return result is in 18.14 format.
|
|
||||||
*/
|
#if defined (ARM_MATH_DSP)
|
||||||
|
|
||||||
void arm_dot_prod_q7(
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
q7_t * pSrcA,
|
|
||||||
q7_t * pSrcB,
|
q31_t input1, input2; /* Temporary variables to store input */
|
||||||
uint32_t blockSize,
|
q31_t inA1, inA2, inB1, inB2; /* Temporary variables to store input */
|
||||||
q31_t * result)
|
|
||||||
{
|
|
||||||
uint32_t blkCnt; /* loop counter */
|
|
||||||
|
/*loop Unrolling */
|
||||||
q31_t sum = 0; /* Temporary variables to store output */
|
blkCnt = blockSize >> 2U;
|
||||||
|
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
q31_t input1, input2; /* Temporary variables to store input */
|
/* read 4 samples at a time from sourceA */
|
||||||
q31_t inA1, inA2, inB1, inB2; /* Temporary variables to store input */
|
input1 = *__SIMD32(pSrcA)++;
|
||||||
|
/* read 4 samples at a time from sourceB */
|
||||||
|
input2 = *__SIMD32(pSrcB)++;
|
||||||
|
|
||||||
/*loop Unrolling */
|
/* extract two q7_t samples to q15_t samples */
|
||||||
blkCnt = blockSize >> 2u;
|
inA1 = __SXTB16(__ROR(input1, 8));
|
||||||
|
/* extract reminaing two samples */
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
inA2 = __SXTB16(input1);
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
/* extract two q7_t samples to q15_t samples */
|
||||||
while(blkCnt > 0u)
|
inB1 = __SXTB16(__ROR(input2, 8));
|
||||||
{
|
/* extract reminaing two samples */
|
||||||
/* read 4 samples at a time from sourceA */
|
inB2 = __SXTB16(input2);
|
||||||
input1 = *__SIMD32(pSrcA)++;
|
|
||||||
/* read 4 samples at a time from sourceB */
|
/* multiply and accumulate two samples at a time */
|
||||||
input2 = *__SIMD32(pSrcB)++;
|
sum = __SMLAD(inA1, inB1, sum);
|
||||||
|
sum = __SMLAD(inA2, inB2, sum);
|
||||||
/* extract two q7_t samples to q15_t samples */
|
|
||||||
inA1 = __SXTB16(__ROR(input1, 8));
|
/* Decrement the loop counter */
|
||||||
/* extract reminaing two samples */
|
blkCnt--;
|
||||||
inA2 = __SXTB16(input1);
|
}
|
||||||
/* extract two q7_t samples to q15_t samples */
|
|
||||||
inB1 = __SXTB16(__ROR(input2, 8));
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
/* extract reminaing two samples */
|
** No loop unrolling is used. */
|
||||||
inB2 = __SXTB16(input2);
|
blkCnt = blockSize % 0x4U;
|
||||||
|
|
||||||
/* multiply and accumulate two samples at a time */
|
while (blkCnt > 0U)
|
||||||
sum = __SMLAD(inA1, inB1, sum);
|
{
|
||||||
sum = __SMLAD(inA2, inB2, sum);
|
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||||
|
/* Dot product and then store the results in a temporary buffer. */
|
||||||
/* Decrement the loop counter */
|
sum = __SMLAD(*pSrcA++, *pSrcB++, sum);
|
||||||
blkCnt--;
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
}
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
#else
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/* Run the below code for Cortex-M0 */
|
||||||
{
|
|
||||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
|
||||||
/* Dot product and then store the results in a temporary buffer. */
|
|
||||||
sum = __SMLAD(*pSrcA++, *pSrcB++, sum);
|
/* Initialize blkCnt with number of samples */
|
||||||
|
blkCnt = blockSize;
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
while (blkCnt > 0U)
|
||||||
}
|
{
|
||||||
|
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
||||||
#else
|
/* Dot product and then store the results in a temporary buffer. */
|
||||||
|
sum += (q31_t) ((q15_t) * pSrcA++ * *pSrcB++);
|
||||||
/* Run the below code for Cortex-M0 */
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
|
}
|
||||||
/* Initialize blkCnt with number of samples */
|
|
||||||
blkCnt = blockSize;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
/* Store the result in the destination buffer in 18.14 format */
|
||||||
/* C = A[0]* B[0] + A[1]* B[1] + A[2]* B[2] + .....+ A[blockSize-1]* B[blockSize-1] */
|
*result = sum;
|
||||||
/* Dot product and then store the results in a temporary buffer. */
|
}
|
||||||
sum += (q31_t) ((q15_t) * pSrcA++ * *pSrcB++);
|
|
||||||
|
/**
|
||||||
/* Decrement the loop counter */
|
* @} end of dot_prod group
|
||||||
blkCnt--;
|
*/
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
|
|
||||||
/* Store the result in the destination buffer in 18.14 format */
|
|
||||||
*result = sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of dot_prod group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,174 +1,162 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_mult_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Floating-point vector multiplication
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_mult_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Floating-point vector multiplication.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @defgroup BasicMult Vector Multiplication
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* Element-by-element multiplication of two vectors.
|
||||||
* -------------------------------------------------------------------- */
|
*
|
||||||
|
* <pre>
|
||||||
#include "arm_math.h"
|
* pDst[n] = pSrcA[n] * pSrcB[n], 0 <= n < blockSize.
|
||||||
|
* </pre>
|
||||||
/**
|
*
|
||||||
* @ingroup groupMath
|
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @defgroup BasicMult Vector Multiplication
|
* @addtogroup BasicMult
|
||||||
*
|
* @{
|
||||||
* Element-by-element multiplication of two vectors.
|
*/
|
||||||
*
|
|
||||||
* <pre>
|
/**
|
||||||
* pDst[n] = pSrcA[n] * pSrcB[n], 0 <= n < blockSize.
|
* @brief Floating-point vector multiplication.
|
||||||
* </pre>
|
* @param[in] *pSrcA points to the first input vector
|
||||||
*
|
* @param[in] *pSrcB points to the second input vector
|
||||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
* @param[out] *pDst points to the output vector
|
||||||
*/
|
* @param[in] blockSize number of samples in each vector
|
||||||
|
* @return none.
|
||||||
/**
|
*/
|
||||||
* @addtogroup BasicMult
|
|
||||||
* @{
|
void arm_mult_f32(
|
||||||
*/
|
float32_t * pSrcA,
|
||||||
|
float32_t * pSrcB,
|
||||||
/**
|
float32_t * pDst,
|
||||||
* @brief Floating-point vector multiplication.
|
uint32_t blockSize)
|
||||||
* @param[in] *pSrcA points to the first input vector
|
{
|
||||||
* @param[in] *pSrcB points to the second input vector
|
uint32_t blkCnt; /* loop counters */
|
||||||
* @param[out] *pDst points to the output vector
|
#if defined (ARM_MATH_DSP)
|
||||||
* @param[in] blockSize number of samples in each vector
|
|
||||||
* @return none.
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
*/
|
float32_t inA1, inA2, inA3, inA4; /* temporary input variables */
|
||||||
|
float32_t inB1, inB2, inB3, inB4; /* temporary input variables */
|
||||||
void arm_mult_f32(
|
float32_t out1, out2, out3, out4; /* temporary output variables */
|
||||||
float32_t * pSrcA,
|
|
||||||
float32_t * pSrcB,
|
/* loop Unrolling */
|
||||||
float32_t * pDst,
|
blkCnt = blockSize >> 2U;
|
||||||
uint32_t blockSize)
|
|
||||||
{
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
uint32_t blkCnt; /* loop counters */
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* C = A * B */
|
||||||
float32_t inA1, inA2, inA3, inA4; /* temporary input variables */
|
/* Multiply the inputs and store the results in output buffer */
|
||||||
float32_t inB1, inB2, inB3, inB4; /* temporary input variables */
|
/* read sample from sourceA */
|
||||||
float32_t out1, out2, out3, out4; /* temporary output variables */
|
inA1 = *pSrcA;
|
||||||
|
/* read sample from sourceB */
|
||||||
/* loop Unrolling */
|
inB1 = *pSrcB;
|
||||||
blkCnt = blockSize >> 2u;
|
/* read sample from sourceA */
|
||||||
|
inA2 = *(pSrcA + 1);
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
/* read sample from sourceB */
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
inB2 = *(pSrcB + 1);
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
/* out = sourceA * sourceB */
|
||||||
/* C = A * B */
|
out1 = inA1 * inB1;
|
||||||
/* Multiply the inputs and store the results in output buffer */
|
|
||||||
/* read sample from sourceA */
|
/* read sample from sourceA */
|
||||||
inA1 = *pSrcA;
|
inA3 = *(pSrcA + 2);
|
||||||
/* read sample from sourceB */
|
/* read sample from sourceB */
|
||||||
inB1 = *pSrcB;
|
inB3 = *(pSrcB + 2);
|
||||||
/* read sample from sourceA */
|
|
||||||
inA2 = *(pSrcA + 1);
|
/* out = sourceA * sourceB */
|
||||||
/* read sample from sourceB */
|
out2 = inA2 * inB2;
|
||||||
inB2 = *(pSrcB + 1);
|
|
||||||
|
/* read sample from sourceA */
|
||||||
/* out = sourceA * sourceB */
|
inA4 = *(pSrcA + 3);
|
||||||
out1 = inA1 * inB1;
|
|
||||||
|
/* store result to destination buffer */
|
||||||
/* read sample from sourceA */
|
*pDst = out1;
|
||||||
inA3 = *(pSrcA + 2);
|
|
||||||
/* read sample from sourceB */
|
/* read sample from sourceB */
|
||||||
inB3 = *(pSrcB + 2);
|
inB4 = *(pSrcB + 3);
|
||||||
|
|
||||||
/* out = sourceA * sourceB */
|
/* out = sourceA * sourceB */
|
||||||
out2 = inA2 * inB2;
|
out3 = inA3 * inB3;
|
||||||
|
|
||||||
/* read sample from sourceA */
|
/* store result to destination buffer */
|
||||||
inA4 = *(pSrcA + 3);
|
*(pDst + 1) = out2;
|
||||||
|
|
||||||
/* store result to destination buffer */
|
/* out = sourceA * sourceB */
|
||||||
*pDst = out1;
|
out4 = inA4 * inB4;
|
||||||
|
/* store result to destination buffer */
|
||||||
/* read sample from sourceB */
|
*(pDst + 2) = out3;
|
||||||
inB4 = *(pSrcB + 3);
|
/* store result to destination buffer */
|
||||||
|
*(pDst + 3) = out4;
|
||||||
/* out = sourceA * sourceB */
|
|
||||||
out3 = inA3 * inB3;
|
|
||||||
|
/* update pointers to process next samples */
|
||||||
/* store result to destination buffer */
|
pSrcA += 4U;
|
||||||
*(pDst + 1) = out2;
|
pSrcB += 4U;
|
||||||
|
pDst += 4U;
|
||||||
/* out = sourceA * sourceB */
|
|
||||||
out4 = inA4 * inB4;
|
/* Decrement the blockSize loop counter */
|
||||||
/* store result to destination buffer */
|
blkCnt--;
|
||||||
*(pDst + 2) = out3;
|
}
|
||||||
/* store result to destination buffer */
|
|
||||||
*(pDst + 3) = out4;
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = blockSize % 0x4U;
|
||||||
/* update pointers to process next samples */
|
|
||||||
pSrcA += 4u;
|
#else
|
||||||
pSrcB += 4u;
|
|
||||||
pDst += 4u;
|
/* Run the below code for Cortex-M0 */
|
||||||
|
|
||||||
/* Decrement the blockSize loop counter */
|
/* Initialize blkCnt with number of samples */
|
||||||
blkCnt--;
|
blkCnt = blockSize;
|
||||||
}
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
while (blkCnt > 0U)
|
||||||
blkCnt = blockSize % 0x4u;
|
{
|
||||||
|
/* C = A * B */
|
||||||
#else
|
/* Multiply the inputs and store the results in output buffer */
|
||||||
|
*pDst++ = (*pSrcA++) * (*pSrcB++);
|
||||||
/* Run the below code for Cortex-M0 */
|
|
||||||
|
/* Decrement the blockSize loop counter */
|
||||||
/* Initialize blkCnt with number of samples */
|
blkCnt--;
|
||||||
blkCnt = blockSize;
|
}
|
||||||
|
}
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
/**
|
||||||
while(blkCnt > 0u)
|
* @} end of BasicMult group
|
||||||
{
|
*/
|
||||||
/* C = A * B */
|
|
||||||
/* Multiply the inputs and store the results in output buffer */
|
|
||||||
*pDst++ = (*pSrcA++) * (*pSrcB++);
|
|
||||||
|
|
||||||
/* Decrement the blockSize loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicMult group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,154 +1,142 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_mult_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q15 vector multiplication
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_mult_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q15 vector multiplication.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup BasicMult
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_math.h"
|
/**
|
||||||
|
* @brief Q15 vector multiplication
|
||||||
/**
|
* @param[in] *pSrcA points to the first input vector
|
||||||
* @ingroup groupMath
|
* @param[in] *pSrcB points to the second input vector
|
||||||
*/
|
* @param[out] *pDst points to the output vector
|
||||||
|
* @param[in] blockSize number of samples in each vector
|
||||||
/**
|
* @return none.
|
||||||
* @addtogroup BasicMult
|
*
|
||||||
* @{
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
*/
|
* \par
|
||||||
|
* The function uses saturating arithmetic.
|
||||||
|
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
||||||
/**
|
*/
|
||||||
* @brief Q15 vector multiplication
|
|
||||||
* @param[in] *pSrcA points to the first input vector
|
void arm_mult_q15(
|
||||||
* @param[in] *pSrcB points to the second input vector
|
q15_t * pSrcA,
|
||||||
* @param[out] *pDst points to the output vector
|
q15_t * pSrcB,
|
||||||
* @param[in] blockSize number of samples in each vector
|
q15_t * pDst,
|
||||||
* @return none.
|
uint32_t blockSize)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blkCnt; /* loop counters */
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
#if defined (ARM_MATH_DSP)
|
||||||
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
q31_t inA1, inA2, inB1, inB2; /* temporary input variables */
|
||||||
void arm_mult_q15(
|
q15_t out1, out2, out3, out4; /* temporary output variables */
|
||||||
q15_t * pSrcA,
|
q31_t mul1, mul2, mul3, mul4; /* temporary variables */
|
||||||
q15_t * pSrcB,
|
|
||||||
q15_t * pDst,
|
/* loop Unrolling */
|
||||||
uint32_t blockSize)
|
blkCnt = blockSize >> 2U;
|
||||||
{
|
|
||||||
uint32_t blkCnt; /* loop counters */
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* read two samples at a time from sourceA */
|
||||||
q31_t inA1, inA2, inB1, inB2; /* temporary input variables */
|
inA1 = *__SIMD32(pSrcA)++;
|
||||||
q15_t out1, out2, out3, out4; /* temporary output variables */
|
/* read two samples at a time from sourceB */
|
||||||
q31_t mul1, mul2, mul3, mul4; /* temporary variables */
|
inB1 = *__SIMD32(pSrcB)++;
|
||||||
|
/* read two samples at a time from sourceA */
|
||||||
/* loop Unrolling */
|
inA2 = *__SIMD32(pSrcA)++;
|
||||||
blkCnt = blockSize >> 2u;
|
/* read two samples at a time from sourceB */
|
||||||
|
inB2 = *__SIMD32(pSrcB)++;
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
/* multiply mul = sourceA * sourceB */
|
||||||
while(blkCnt > 0u)
|
mul1 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
|
||||||
{
|
mul2 = (q31_t) ((q15_t) inA1 * (q15_t) inB1);
|
||||||
/* read two samples at a time from sourceA */
|
mul3 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB2 >> 16));
|
||||||
inA1 = *__SIMD32(pSrcA)++;
|
mul4 = (q31_t) ((q15_t) inA2 * (q15_t) inB2);
|
||||||
/* read two samples at a time from sourceB */
|
|
||||||
inB1 = *__SIMD32(pSrcB)++;
|
/* saturate result to 16 bit */
|
||||||
/* read two samples at a time from sourceA */
|
out1 = (q15_t) __SSAT(mul1 >> 15, 16);
|
||||||
inA2 = *__SIMD32(pSrcA)++;
|
out2 = (q15_t) __SSAT(mul2 >> 15, 16);
|
||||||
/* read two samples at a time from sourceB */
|
out3 = (q15_t) __SSAT(mul3 >> 15, 16);
|
||||||
inB2 = *__SIMD32(pSrcB)++;
|
out4 = (q15_t) __SSAT(mul4 >> 15, 16);
|
||||||
|
|
||||||
/* multiply mul = sourceA * sourceB */
|
/* store the result */
|
||||||
mul1 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
mul2 = (q31_t) ((q15_t) inA1 * (q15_t) inB1);
|
|
||||||
mul3 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB2 >> 16));
|
*__SIMD32(pDst)++ = __PKHBT(out2, out1, 16);
|
||||||
mul4 = (q31_t) ((q15_t) inA2 * (q15_t) inB2);
|
*__SIMD32(pDst)++ = __PKHBT(out4, out3, 16);
|
||||||
|
|
||||||
/* saturate result to 16 bit */
|
#else
|
||||||
out1 = (q15_t) __SSAT(mul1 >> 15, 16);
|
|
||||||
out2 = (q15_t) __SSAT(mul2 >> 15, 16);
|
*__SIMD32(pDst)++ = __PKHBT(out2, out1, 16);
|
||||||
out3 = (q15_t) __SSAT(mul3 >> 15, 16);
|
*__SIMD32(pDst)++ = __PKHBT(out4, out3, 16);
|
||||||
out4 = (q15_t) __SSAT(mul4 >> 15, 16);
|
|
||||||
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
/* store the result */
|
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
/* Decrement the blockSize loop counter */
|
||||||
|
blkCnt--;
|
||||||
*__SIMD32(pDst)++ = __PKHBT(out2, out1, 16);
|
}
|
||||||
*__SIMD32(pDst)++ = __PKHBT(out4, out3, 16);
|
|
||||||
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
#else
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = blockSize % 0x4U;
|
||||||
*__SIMD32(pDst)++ = __PKHBT(out2, out1, 16);
|
|
||||||
*__SIMD32(pDst)++ = __PKHBT(out4, out3, 16);
|
#else
|
||||||
|
|
||||||
#endif // #ifndef ARM_MATH_BIG_ENDIAN
|
/* Run the below code for Cortex-M0 */
|
||||||
|
|
||||||
/* Decrement the blockSize loop counter */
|
/* Initialize blkCnt with number of samples */
|
||||||
blkCnt--;
|
blkCnt = blockSize;
|
||||||
}
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#else
|
/* C = A * B */
|
||||||
|
/* Multiply the inputs and store the result in the destination buffer */
|
||||||
/* Run the below code for Cortex-M0 */
|
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
|
||||||
|
|
||||||
/* Initialize blkCnt with number of samples */
|
/* Decrement the blockSize loop counter */
|
||||||
blkCnt = blockSize;
|
blkCnt--;
|
||||||
|
}
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
while(blkCnt > 0u)
|
* @} end of BasicMult group
|
||||||
{
|
*/
|
||||||
/* C = A * B */
|
|
||||||
/* Multiply the inputs and store the result in the destination buffer */
|
|
||||||
*pDst++ = (q15_t) __SSAT((((q31_t) (*pSrcA++) * (*pSrcB++)) >> 15), 16);
|
|
||||||
|
|
||||||
/* Decrement the blockSize loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicMult group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,160 +1,148 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_mult_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q31 vector multiplication
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_mult_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q31 vector multiplication.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup BasicMult
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Q31 vector multiplication.
|
||||||
|
* @param[in] *pSrcA points to the first input vector
|
||||||
/**
|
* @param[in] *pSrcB points to the second input vector
|
||||||
* @ingroup groupMath
|
* @param[out] *pDst points to the output vector
|
||||||
*/
|
* @param[in] blockSize number of samples in each vector
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup BasicMult
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The function uses saturating arithmetic.
|
||||||
|
* Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.
|
||||||
/**
|
*/
|
||||||
* @brief Q31 vector multiplication.
|
|
||||||
* @param[in] *pSrcA points to the first input vector
|
void arm_mult_q31(
|
||||||
* @param[in] *pSrcB points to the second input vector
|
q31_t * pSrcA,
|
||||||
* @param[out] *pDst points to the output vector
|
q31_t * pSrcB,
|
||||||
* @param[in] blockSize number of samples in each vector
|
q31_t * pDst,
|
||||||
* @return none.
|
uint32_t blockSize)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blkCnt; /* loop counters */
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
#if defined (ARM_MATH_DSP)
|
||||||
* Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
q31_t inA1, inA2, inA3, inA4; /* temporary input variables */
|
||||||
void arm_mult_q31(
|
q31_t inB1, inB2, inB3, inB4; /* temporary input variables */
|
||||||
q31_t * pSrcA,
|
q31_t out1, out2, out3, out4; /* temporary output variables */
|
||||||
q31_t * pSrcB,
|
|
||||||
q31_t * pDst,
|
/* loop Unrolling */
|
||||||
uint32_t blockSize)
|
blkCnt = blockSize >> 2U;
|
||||||
{
|
|
||||||
uint32_t blkCnt; /* loop counters */
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* C = A * B */
|
||||||
q31_t inA1, inA2, inA3, inA4; /* temporary input variables */
|
/* Multiply the inputs and then store the results in the destination buffer. */
|
||||||
q31_t inB1, inB2, inB3, inB4; /* temporary input variables */
|
inA1 = *pSrcA++;
|
||||||
q31_t out1, out2, out3, out4; /* temporary output variables */
|
inA2 = *pSrcA++;
|
||||||
|
inA3 = *pSrcA++;
|
||||||
/* loop Unrolling */
|
inA4 = *pSrcA++;
|
||||||
blkCnt = blockSize >> 2u;
|
inB1 = *pSrcB++;
|
||||||
|
inB2 = *pSrcB++;
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
inB3 = *pSrcB++;
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
inB4 = *pSrcB++;
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
out1 = ((q63_t) inA1 * inB1) >> 32;
|
||||||
/* C = A * B */
|
out2 = ((q63_t) inA2 * inB2) >> 32;
|
||||||
/* Multiply the inputs and then store the results in the destination buffer. */
|
out3 = ((q63_t) inA3 * inB3) >> 32;
|
||||||
inA1 = *pSrcA++;
|
out4 = ((q63_t) inA4 * inB4) >> 32;
|
||||||
inA2 = *pSrcA++;
|
|
||||||
inA3 = *pSrcA++;
|
out1 = __SSAT(out1, 31);
|
||||||
inA4 = *pSrcA++;
|
out2 = __SSAT(out2, 31);
|
||||||
inB1 = *pSrcB++;
|
out3 = __SSAT(out3, 31);
|
||||||
inB2 = *pSrcB++;
|
out4 = __SSAT(out4, 31);
|
||||||
inB3 = *pSrcB++;
|
|
||||||
inB4 = *pSrcB++;
|
*pDst++ = out1 << 1U;
|
||||||
|
*pDst++ = out2 << 1U;
|
||||||
out1 = ((q63_t) inA1 * inB1) >> 32;
|
*pDst++ = out3 << 1U;
|
||||||
out2 = ((q63_t) inA2 * inB2) >> 32;
|
*pDst++ = out4 << 1U;
|
||||||
out3 = ((q63_t) inA3 * inB3) >> 32;
|
|
||||||
out4 = ((q63_t) inA4 * inB4) >> 32;
|
/* Decrement the blockSize loop counter */
|
||||||
|
blkCnt--;
|
||||||
out1 = __SSAT(out1, 31);
|
}
|
||||||
out2 = __SSAT(out2, 31);
|
|
||||||
out3 = __SSAT(out3, 31);
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
out4 = __SSAT(out4, 31);
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = blockSize % 0x4U;
|
||||||
*pDst++ = out1 << 1u;
|
|
||||||
*pDst++ = out2 << 1u;
|
while (blkCnt > 0U)
|
||||||
*pDst++ = out3 << 1u;
|
{
|
||||||
*pDst++ = out4 << 1u;
|
/* C = A * B */
|
||||||
|
/* Multiply the inputs and then store the results in the destination buffer. */
|
||||||
/* Decrement the blockSize loop counter */
|
inA1 = *pSrcA++;
|
||||||
blkCnt--;
|
inB1 = *pSrcB++;
|
||||||
}
|
out1 = ((q63_t) inA1 * inB1) >> 32;
|
||||||
|
out1 = __SSAT(out1, 31);
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
*pDst++ = out1 << 1U;
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
/* Decrement the blockSize loop counter */
|
||||||
|
blkCnt--;
|
||||||
while(blkCnt > 0u)
|
}
|
||||||
{
|
|
||||||
/* C = A * B */
|
#else
|
||||||
/* Multiply the inputs and then store the results in the destination buffer. */
|
|
||||||
inA1 = *pSrcA++;
|
/* Run the below code for Cortex-M0 */
|
||||||
inB1 = *pSrcB++;
|
|
||||||
out1 = ((q63_t) inA1 * inB1) >> 32;
|
/* Initialize blkCnt with number of samples */
|
||||||
out1 = __SSAT(out1, 31);
|
blkCnt = blockSize;
|
||||||
*pDst++ = out1 << 1u;
|
|
||||||
|
|
||||||
/* Decrement the blockSize loop counter */
|
while (blkCnt > 0U)
|
||||||
blkCnt--;
|
{
|
||||||
}
|
/* C = A * B */
|
||||||
|
/* Multiply the inputs and then store the results in the destination buffer. */
|
||||||
#else
|
*pDst++ =
|
||||||
|
(q31_t) clip_q63_to_q31(((q63_t) (*pSrcA++) * (*pSrcB++)) >> 31);
|
||||||
/* Run the below code for Cortex-M0 */
|
|
||||||
|
/* Decrement the blockSize loop counter */
|
||||||
/* Initialize blkCnt with number of samples */
|
blkCnt--;
|
||||||
blkCnt = blockSize;
|
}
|
||||||
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
while(blkCnt > 0u)
|
}
|
||||||
{
|
|
||||||
/* C = A * B */
|
/**
|
||||||
/* Multiply the inputs and then store the results in the destination buffer. */
|
* @} end of BasicMult group
|
||||||
*pDst++ =
|
*/
|
||||||
(q31_t) clip_q63_to_q31(((q63_t) (*pSrcA++) * (*pSrcB++)) >> 31);
|
|
||||||
|
|
||||||
/* Decrement the blockSize loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicMult group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,127 +1,115 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_mult_q7.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q7 vector multiplication
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_mult_q7.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q7 vector multiplication.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup BasicMult
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Q7 vector multiplication
|
||||||
|
* @param[in] *pSrcA points to the first input vector
|
||||||
/**
|
* @param[in] *pSrcB points to the second input vector
|
||||||
* @ingroup groupMath
|
* @param[out] *pDst points to the output vector
|
||||||
*/
|
* @param[in] blockSize number of samples in each vector
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup BasicMult
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The function uses saturating arithmetic.
|
||||||
|
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
|
||||||
/**
|
*/
|
||||||
* @brief Q7 vector multiplication
|
|
||||||
* @param[in] *pSrcA points to the first input vector
|
void arm_mult_q7(
|
||||||
* @param[in] *pSrcB points to the second input vector
|
q7_t * pSrcA,
|
||||||
* @param[out] *pDst points to the output vector
|
q7_t * pSrcB,
|
||||||
* @param[in] blockSize number of samples in each vector
|
q7_t * pDst,
|
||||||
* @return none.
|
uint32_t blockSize)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blkCnt; /* loop counters */
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
#if defined (ARM_MATH_DSP)
|
||||||
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
q7_t out1, out2, out3, out4; /* Temporary variables to store the product */
|
||||||
void arm_mult_q7(
|
|
||||||
q7_t * pSrcA,
|
/* loop Unrolling */
|
||||||
q7_t * pSrcB,
|
blkCnt = blockSize >> 2U;
|
||||||
q7_t * pDst,
|
|
||||||
uint32_t blockSize)
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
{
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
uint32_t blkCnt; /* loop counters */
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* C = A * B */
|
||||||
|
/* Multiply the inputs and store the results in temporary variables */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
out1 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||||
q7_t out1, out2, out3, out4; /* Temporary variables to store the product */
|
out2 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||||
|
out3 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||||
/* loop Unrolling */
|
out4 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||||
blkCnt = blockSize >> 2u;
|
|
||||||
|
/* Store the results of 4 inputs in the destination buffer in single cycle by packing */
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
*__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4);
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
|
||||||
while(blkCnt > 0u)
|
/* Decrement the blockSize loop counter */
|
||||||
{
|
blkCnt--;
|
||||||
/* C = A * B */
|
}
|
||||||
/* Multiply the inputs and store the results in temporary variables */
|
|
||||||
out1 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
out2 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
** No loop unrolling is used. */
|
||||||
out3 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
blkCnt = blockSize % 0x4U;
|
||||||
out4 = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
|
||||||
|
#else
|
||||||
/* Store the results of 4 inputs in the destination buffer in single cycle by packing */
|
|
||||||
*__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4);
|
/* Run the below code for Cortex-M0 */
|
||||||
|
|
||||||
/* Decrement the blockSize loop counter */
|
/* Initialize blkCnt with number of samples */
|
||||||
blkCnt--;
|
blkCnt = blockSize;
|
||||||
}
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#else
|
/* C = A * B */
|
||||||
|
/* Multiply the inputs and store the result in the destination buffer */
|
||||||
/* Run the below code for Cortex-M0 */
|
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
||||||
|
|
||||||
/* Initialize blkCnt with number of samples */
|
/* Decrement the blockSize loop counter */
|
||||||
blkCnt = blockSize;
|
blkCnt--;
|
||||||
|
}
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
while(blkCnt > 0u)
|
* @} end of BasicMult group
|
||||||
{
|
*/
|
||||||
/* C = A * B */
|
|
||||||
/* Multiply the inputs and store the result in the destination buffer */
|
|
||||||
*pDst++ = (q7_t) __SSAT((((q15_t) (*pSrcA++) * (*pSrcB++)) >> 7), 8);
|
|
||||||
|
|
||||||
/* Decrement the blockSize loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicMult group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,146 +1,134 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_negate_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Negates floating-point vectors
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_negate_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Negates floating-point vectors.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @defgroup negate Vector Negate
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* Negates the elements of a vector.
|
||||||
* ---------------------------------------------------------------------------- */
|
*
|
||||||
|
* <pre>
|
||||||
#include "arm_math.h"
|
* pDst[n] = -pSrc[n], 0 <= n < blockSize.
|
||||||
|
* </pre>
|
||||||
/**
|
*
|
||||||
* @ingroup groupMath
|
* The functions support in-place computation allowing the source and
|
||||||
*/
|
* destination pointers to reference the same memory buffer.
|
||||||
|
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||||
/**
|
*/
|
||||||
* @defgroup negate Vector Negate
|
|
||||||
*
|
/**
|
||||||
* Negates the elements of a vector.
|
* @addtogroup negate
|
||||||
*
|
* @{
|
||||||
* <pre>
|
*/
|
||||||
* pDst[n] = -pSrc[n], 0 <= n < blockSize.
|
|
||||||
* </pre>
|
/**
|
||||||
*
|
* @brief Negates the elements of a floating-point vector.
|
||||||
* The functions support in-place computation allowing the source and
|
* @param[in] *pSrc points to the input vector
|
||||||
* destination pointers to reference the same memory buffer.
|
* @param[out] *pDst points to the output vector
|
||||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
* @param[in] blockSize number of samples in the vector
|
||||||
*/
|
* @return none.
|
||||||
|
*/
|
||||||
/**
|
|
||||||
* @addtogroup negate
|
void arm_negate_f32(
|
||||||
* @{
|
float32_t * pSrc,
|
||||||
*/
|
float32_t * pDst,
|
||||||
|
uint32_t blockSize)
|
||||||
/**
|
{
|
||||||
* @brief Negates the elements of a floating-point vector.
|
uint32_t blkCnt; /* loop counter */
|
||||||
* @param[in] *pSrc points to the input vector
|
|
||||||
* @param[out] *pDst points to the output vector
|
|
||||||
* @param[in] blockSize number of samples in the vector
|
#if defined (ARM_MATH_DSP)
|
||||||
* @return none.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
float32_t in1, in2, in3, in4; /* temporary variables */
|
||||||
void arm_negate_f32(
|
|
||||||
float32_t * pSrc,
|
/*loop Unrolling */
|
||||||
float32_t * pDst,
|
blkCnt = blockSize >> 2U;
|
||||||
uint32_t blockSize)
|
|
||||||
{
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
uint32_t blkCnt; /* loop counter */
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* read inputs from source */
|
||||||
|
in1 = *pSrc;
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
in2 = *(pSrc + 1);
|
||||||
float32_t in1, in2, in3, in4; /* temporary variables */
|
in3 = *(pSrc + 2);
|
||||||
|
in4 = *(pSrc + 3);
|
||||||
/*loop Unrolling */
|
|
||||||
blkCnt = blockSize >> 2u;
|
/* negate the input */
|
||||||
|
in1 = -in1;
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
in2 = -in2;
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
in3 = -in3;
|
||||||
while(blkCnt > 0u)
|
in4 = -in4;
|
||||||
{
|
|
||||||
/* read inputs from source */
|
/* store the result to destination */
|
||||||
in1 = *pSrc;
|
*pDst = in1;
|
||||||
in2 = *(pSrc + 1);
|
*(pDst + 1) = in2;
|
||||||
in3 = *(pSrc + 2);
|
*(pDst + 2) = in3;
|
||||||
in4 = *(pSrc + 3);
|
*(pDst + 3) = in4;
|
||||||
|
|
||||||
/* negate the input */
|
/* update pointers to process next samples */
|
||||||
in1 = -in1;
|
pSrc += 4U;
|
||||||
in2 = -in2;
|
pDst += 4U;
|
||||||
in3 = -in3;
|
|
||||||
in4 = -in4;
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* store the result to destination */
|
}
|
||||||
*pDst = in1;
|
|
||||||
*(pDst + 1) = in2;
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
*(pDst + 2) = in3;
|
** No loop unrolling is used. */
|
||||||
*(pDst + 3) = in4;
|
blkCnt = blockSize % 0x4U;
|
||||||
|
|
||||||
/* update pointers to process next samples */
|
#else
|
||||||
pSrc += 4u;
|
|
||||||
pDst += 4u;
|
/* Run the below code for Cortex-M0 */
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
/* Initialize blkCnt with number of samples */
|
||||||
blkCnt--;
|
blkCnt = blockSize;
|
||||||
}
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
while (blkCnt > 0U)
|
||||||
blkCnt = blockSize % 0x4u;
|
{
|
||||||
|
/* C = -A */
|
||||||
#else
|
/* Negate and then store the results in the destination buffer. */
|
||||||
|
*pDst++ = -*pSrc++;
|
||||||
/* Run the below code for Cortex-M0 */
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* Initialize blkCnt with number of samples */
|
blkCnt--;
|
||||||
blkCnt = blockSize;
|
}
|
||||||
|
}
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
/**
|
||||||
while(blkCnt > 0u)
|
* @} end of negate group
|
||||||
{
|
*/
|
||||||
/* C = -A */
|
|
||||||
/* Negate and then store the results in the destination buffer. */
|
|
||||||
*pDst++ = -*pSrc++;
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of negate group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,142 +1,131 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_negate_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Negates Q15 vectors
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_negate_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Negates Q15 vectors.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup negate
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
#include "arm_math.h"
|
/**
|
||||||
|
* @brief Negates the elements of a Q15 vector.
|
||||||
/**
|
* @param[in] *pSrc points to the input vector
|
||||||
* @ingroup groupMath
|
* @param[out] *pDst points to the output vector
|
||||||
*/
|
* @param[in] blockSize number of samples in the vector
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup negate
|
* \par Conditions for optimum performance
|
||||||
* @{
|
* Input and output buffers should be aligned by 32-bit
|
||||||
*/
|
*
|
||||||
|
*
|
||||||
/**
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @brief Negates the elements of a Q15 vector.
|
* \par
|
||||||
* @param[in] *pSrc points to the input vector
|
* The function uses saturating arithmetic.
|
||||||
* @param[out] *pDst points to the output vector
|
* The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
|
||||||
* @param[in] blockSize number of samples in the vector
|
*/
|
||||||
* @return none.
|
|
||||||
*
|
void arm_negate_q15(
|
||||||
* \par Conditions for optimum performance
|
q15_t * pSrc,
|
||||||
* Input and output buffers should be aligned by 32-bit
|
q15_t * pDst,
|
||||||
*
|
uint32_t blockSize)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blkCnt; /* loop counter */
|
||||||
* \par
|
q15_t in;
|
||||||
* The function uses saturating arithmetic.
|
|
||||||
* The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
|
#if defined (ARM_MATH_DSP)
|
||||||
*/
|
|
||||||
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
void arm_negate_q15(
|
|
||||||
q15_t * pSrc,
|
q31_t in1, in2; /* Temporary variables */
|
||||||
q15_t * pDst,
|
|
||||||
uint32_t blockSize)
|
|
||||||
{
|
/*loop Unrolling */
|
||||||
uint32_t blkCnt; /* loop counter */
|
blkCnt = blockSize >> 2U;
|
||||||
q15_t in;
|
|
||||||
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
{
|
||||||
|
/* C = -A */
|
||||||
q31_t in1, in2; /* Temporary variables */
|
/* Read two inputs at a time */
|
||||||
|
in1 = _SIMD32_OFFSET(pSrc);
|
||||||
|
in2 = _SIMD32_OFFSET(pSrc + 2);
|
||||||
/*loop Unrolling */
|
|
||||||
blkCnt = blockSize >> 2u;
|
/* negate two samples at a time */
|
||||||
|
in1 = __QSUB16(0, in1);
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
/* negate two samples at a time */
|
||||||
while(blkCnt > 0u)
|
in2 = __QSUB16(0, in2);
|
||||||
{
|
|
||||||
/* C = -A */
|
/* store the result to destination 2 samples at a time */
|
||||||
/* Read two inputs at a time */
|
_SIMD32_OFFSET(pDst) = in1;
|
||||||
in1 = _SIMD32_OFFSET(pSrc);
|
/* store the result to destination 2 samples at a time */
|
||||||
in2 = _SIMD32_OFFSET(pSrc + 2);
|
_SIMD32_OFFSET(pDst + 2) = in2;
|
||||||
|
|
||||||
/* negate two samples at a time */
|
|
||||||
in1 = __QSUB16(0, in1);
|
/* update pointers to process next samples */
|
||||||
|
pSrc += 4U;
|
||||||
/* negate two samples at a time */
|
pDst += 4U;
|
||||||
in2 = __QSUB16(0, in2);
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* store the result to destination 2 samples at a time */
|
blkCnt--;
|
||||||
_SIMD32_OFFSET(pDst) = in1;
|
}
|
||||||
/* store the result to destination 2 samples at a time */
|
|
||||||
_SIMD32_OFFSET(pDst + 2) = in2;
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = blockSize % 0x4U;
|
||||||
/* update pointers to process next samples */
|
|
||||||
pSrc += 4u;
|
#else
|
||||||
pDst += 4u;
|
|
||||||
|
/* Run the below code for Cortex-M0 */
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
/* Initialize blkCnt with number of samples */
|
||||||
}
|
blkCnt = blockSize;
|
||||||
|
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#else
|
/* C = -A */
|
||||||
|
/* Negate and then store the result in the destination buffer. */
|
||||||
/* Run the below code for Cortex-M0 */
|
in = *pSrc++;
|
||||||
|
*pDst++ = (in == (q15_t) 0x8000) ? 0x7fff : -in;
|
||||||
/* Initialize blkCnt with number of samples */
|
|
||||||
blkCnt = blockSize;
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
}
|
||||||
|
}
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
/**
|
||||||
/* C = -A */
|
* @} end of negate group
|
||||||
/* Negate and then store the result in the destination buffer. */
|
*/
|
||||||
in = *pSrc++;
|
|
||||||
*pDst++ = (in == (q15_t) 0x8000) ? 0x7fff : -in;
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of negate group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,129 +1,117 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_negate_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Negates Q31 vectors
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_negate_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Negates Q31 vectors.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup negate
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Negates the elements of a Q31 vector.
|
||||||
|
* @param[in] *pSrc points to the input vector
|
||||||
/**
|
* @param[out] *pDst points to the output vector
|
||||||
* @ingroup groupMath
|
* @param[in] blockSize number of samples in the vector
|
||||||
*/
|
* @return none.
|
||||||
|
*
|
||||||
/**
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @addtogroup negate
|
* \par
|
||||||
* @{
|
* The function uses saturating arithmetic.
|
||||||
*/
|
* The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
|
||||||
|
*/
|
||||||
/**
|
|
||||||
* @brief Negates the elements of a Q31 vector.
|
void arm_negate_q31(
|
||||||
* @param[in] *pSrc points to the input vector
|
q31_t * pSrc,
|
||||||
* @param[out] *pDst points to the output vector
|
q31_t * pDst,
|
||||||
* @param[in] blockSize number of samples in the vector
|
uint32_t blockSize)
|
||||||
* @return none.
|
{
|
||||||
*
|
q31_t in; /* Temporary variable */
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blkCnt; /* loop counter */
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
#if defined (ARM_MATH_DSP)
|
||||||
* The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
q31_t in1, in2, in3, in4;
|
||||||
void arm_negate_q31(
|
|
||||||
q31_t * pSrc,
|
/*loop Unrolling */
|
||||||
q31_t * pDst,
|
blkCnt = blockSize >> 2U;
|
||||||
uint32_t blockSize)
|
|
||||||
{
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
q31_t in; /* Temporary variable */
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
uint32_t blkCnt; /* loop counter */
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* C = -A */
|
||||||
|
/* Negate and then store the results in the destination buffer. */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
in1 = *pSrc++;
|
||||||
q31_t in1, in2, in3, in4;
|
in2 = *pSrc++;
|
||||||
|
in3 = *pSrc++;
|
||||||
/*loop Unrolling */
|
in4 = *pSrc++;
|
||||||
blkCnt = blockSize >> 2u;
|
|
||||||
|
*pDst++ = __QSUB(0, in1);
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
*pDst++ = __QSUB(0, in2);
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
*pDst++ = __QSUB(0, in3);
|
||||||
while(blkCnt > 0u)
|
*pDst++ = __QSUB(0, in4);
|
||||||
{
|
|
||||||
/* C = -A */
|
/* Decrement the loop counter */
|
||||||
/* Negate and then store the results in the destination buffer. */
|
blkCnt--;
|
||||||
in1 = *pSrc++;
|
}
|
||||||
in2 = *pSrc++;
|
|
||||||
in3 = *pSrc++;
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
in4 = *pSrc++;
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = blockSize % 0x4U;
|
||||||
*pDst++ = __QSUB(0, in1);
|
|
||||||
*pDst++ = __QSUB(0, in2);
|
#else
|
||||||
*pDst++ = __QSUB(0, in3);
|
|
||||||
*pDst++ = __QSUB(0, in4);
|
/* Run the below code for Cortex-M0 */
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
/* Initialize blkCnt with number of samples */
|
||||||
blkCnt--;
|
blkCnt = blockSize;
|
||||||
}
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#else
|
/* C = -A */
|
||||||
|
/* Negate and then store the result in the destination buffer. */
|
||||||
/* Run the below code for Cortex-M0 */
|
in = *pSrc++;
|
||||||
|
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
|
||||||
/* Initialize blkCnt with number of samples */
|
|
||||||
blkCnt = blockSize;
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
}
|
||||||
|
}
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/**
|
||||||
{
|
* @} end of negate group
|
||||||
/* C = -A */
|
*/
|
||||||
/* Negate and then store the result in the destination buffer. */
|
|
||||||
in = *pSrc++;
|
|
||||||
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of negate group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,125 +1,113 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_negate_q7.c
|
||||||
* $Date: 12. March 2014
|
* Description: Negates Q7 vectors
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_negate_q7.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Negates Q7 vectors.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup negate
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Negates the elements of a Q7 vector.
|
||||||
|
* @param[in] *pSrc points to the input vector
|
||||||
/**
|
* @param[out] *pDst points to the output vector
|
||||||
* @ingroup groupMath
|
* @param[in] blockSize number of samples in the vector
|
||||||
*/
|
* @return none.
|
||||||
|
*
|
||||||
/**
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @addtogroup negate
|
* \par
|
||||||
* @{
|
* The function uses saturating arithmetic.
|
||||||
*/
|
* The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.
|
||||||
|
*/
|
||||||
/**
|
|
||||||
* @brief Negates the elements of a Q7 vector.
|
void arm_negate_q7(
|
||||||
* @param[in] *pSrc points to the input vector
|
q7_t * pSrc,
|
||||||
* @param[out] *pDst points to the output vector
|
q7_t * pDst,
|
||||||
* @param[in] blockSize number of samples in the vector
|
uint32_t blockSize)
|
||||||
* @return none.
|
{
|
||||||
*
|
uint32_t blkCnt; /* loop counter */
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
q7_t in;
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
#if defined (ARM_MATH_DSP)
|
||||||
* The Q7 value -1 (0x80) will be saturated to the maximum allowable positive value 0x7F.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
q31_t input; /* Input values1-4 */
|
||||||
void arm_negate_q7(
|
q31_t zero = 0x00000000;
|
||||||
q7_t * pSrc,
|
|
||||||
q7_t * pDst,
|
|
||||||
uint32_t blockSize)
|
/*loop Unrolling */
|
||||||
{
|
blkCnt = blockSize >> 2U;
|
||||||
uint32_t blkCnt; /* loop counter */
|
|
||||||
q7_t in;
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* C = -A */
|
||||||
q31_t input; /* Input values1-4 */
|
/* Read four inputs */
|
||||||
q31_t zero = 0x00000000;
|
input = *__SIMD32(pSrc)++;
|
||||||
|
|
||||||
|
/* Store the Negated results in the destination buffer in a single cycle by packing the results */
|
||||||
/*loop Unrolling */
|
*__SIMD32(pDst)++ = __QSUB8(zero, input);
|
||||||
blkCnt = blockSize >> 2u;
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
blkCnt--;
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
}
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
/* C = -A */
|
** No loop unrolling is used. */
|
||||||
/* Read four inputs */
|
blkCnt = blockSize % 0x4U;
|
||||||
input = *__SIMD32(pSrc)++;
|
|
||||||
|
#else
|
||||||
/* Store the Negated results in the destination buffer in a single cycle by packing the results */
|
|
||||||
*__SIMD32(pDst)++ = __QSUB8(zero, input);
|
/* Run the below code for Cortex-M0 */
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
/* Initialize blkCnt with number of samples */
|
||||||
blkCnt--;
|
blkCnt = blockSize;
|
||||||
}
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
while (blkCnt > 0U)
|
||||||
blkCnt = blockSize % 0x4u;
|
{
|
||||||
|
/* C = -A */
|
||||||
#else
|
/* Negate and then store the results in the destination buffer. */ \
|
||||||
|
in = *pSrc++;
|
||||||
/* Run the below code for Cortex-M0 */
|
*pDst++ = (in == (q7_t) 0x80) ? 0x7f : -in;
|
||||||
|
|
||||||
/* Initialize blkCnt with number of samples */
|
/* Decrement the loop counter */
|
||||||
blkCnt = blockSize;
|
blkCnt--;
|
||||||
|
}
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
}
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/**
|
||||||
{
|
* @} end of negate group
|
||||||
/* C = -A */
|
*/
|
||||||
/* Negate and then store the results in the destination buffer. */ \
|
|
||||||
in = *pSrc++;
|
|
||||||
*pDst++ = (in == (q7_t) 0x80) ? 0x7f : -in;
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of negate group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,165 +1,154 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_offset_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Floating-point vector offset
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_offset_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Floating-point vector offset.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @defgroup offset Vector Offset
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* Adds a constant offset to each element of a vector.
|
||||||
* ---------------------------------------------------------------------------- */
|
*
|
||||||
#include "arm_math.h"
|
* <pre>
|
||||||
|
* pDst[n] = pSrc[n] + offset, 0 <= n < blockSize.
|
||||||
/**
|
* </pre>
|
||||||
* @ingroup groupMath
|
*
|
||||||
*/
|
* The functions support in-place computation allowing the source and
|
||||||
|
* destination pointers to reference the same memory buffer.
|
||||||
/**
|
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||||
* @defgroup offset Vector Offset
|
*/
|
||||||
*
|
|
||||||
* Adds a constant offset to each element of a vector.
|
/**
|
||||||
*
|
* @addtogroup offset
|
||||||
* <pre>
|
* @{
|
||||||
* pDst[n] = pSrc[n] + offset, 0 <= n < blockSize.
|
*/
|
||||||
* </pre>
|
|
||||||
*
|
/**
|
||||||
* The functions support in-place computation allowing the source and
|
* @brief Adds a constant offset to a floating-point vector.
|
||||||
* destination pointers to reference the same memory buffer.
|
* @param[in] *pSrc points to the input vector
|
||||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
* @param[in] offset is the offset to be added
|
||||||
*/
|
* @param[out] *pDst points to the output vector
|
||||||
|
* @param[in] blockSize number of samples in the vector
|
||||||
/**
|
* @return none.
|
||||||
* @addtogroup offset
|
*/
|
||||||
* @{
|
|
||||||
*/
|
|
||||||
|
void arm_offset_f32(
|
||||||
/**
|
float32_t * pSrc,
|
||||||
* @brief Adds a constant offset to a floating-point vector.
|
float32_t offset,
|
||||||
* @param[in] *pSrc points to the input vector
|
float32_t * pDst,
|
||||||
* @param[in] offset is the offset to be added
|
uint32_t blockSize)
|
||||||
* @param[out] *pDst points to the output vector
|
{
|
||||||
* @param[in] blockSize number of samples in the vector
|
uint32_t blkCnt; /* loop counter */
|
||||||
* @return none.
|
|
||||||
*/
|
#if defined (ARM_MATH_DSP)
|
||||||
|
|
||||||
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
void arm_offset_f32(
|
float32_t in1, in2, in3, in4;
|
||||||
float32_t * pSrc,
|
|
||||||
float32_t offset,
|
/*loop Unrolling */
|
||||||
float32_t * pDst,
|
blkCnt = blockSize >> 2U;
|
||||||
uint32_t blockSize)
|
|
||||||
{
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
uint32_t blkCnt; /* loop counter */
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
{
|
||||||
|
/* C = A + offset */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* Add offset and then store the results in the destination buffer. */
|
||||||
float32_t in1, in2, in3, in4;
|
/* read samples from source */
|
||||||
|
in1 = *pSrc;
|
||||||
/*loop Unrolling */
|
in2 = *(pSrc + 1);
|
||||||
blkCnt = blockSize >> 2u;
|
|
||||||
|
/* add offset to input */
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
in1 = in1 + offset;
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
|
||||||
while(blkCnt > 0u)
|
/* read samples from source */
|
||||||
{
|
in3 = *(pSrc + 2);
|
||||||
/* C = A + offset */
|
|
||||||
/* Add offset and then store the results in the destination buffer. */
|
/* add offset to input */
|
||||||
/* read samples from source */
|
in2 = in2 + offset;
|
||||||
in1 = *pSrc;
|
|
||||||
in2 = *(pSrc + 1);
|
/* read samples from source */
|
||||||
|
in4 = *(pSrc + 3);
|
||||||
/* add offset to input */
|
|
||||||
in1 = in1 + offset;
|
/* add offset to input */
|
||||||
|
in3 = in3 + offset;
|
||||||
/* read samples from source */
|
|
||||||
in3 = *(pSrc + 2);
|
/* store result to destination */
|
||||||
|
*pDst = in1;
|
||||||
/* add offset to input */
|
|
||||||
in2 = in2 + offset;
|
/* add offset to input */
|
||||||
|
in4 = in4 + offset;
|
||||||
/* read samples from source */
|
|
||||||
in4 = *(pSrc + 3);
|
/* store result to destination */
|
||||||
|
*(pDst + 1) = in2;
|
||||||
/* add offset to input */
|
|
||||||
in3 = in3 + offset;
|
/* store result to destination */
|
||||||
|
*(pDst + 2) = in3;
|
||||||
/* store result to destination */
|
|
||||||
*pDst = in1;
|
/* store result to destination */
|
||||||
|
*(pDst + 3) = in4;
|
||||||
/* add offset to input */
|
|
||||||
in4 = in4 + offset;
|
/* update pointers to process next samples */
|
||||||
|
pSrc += 4U;
|
||||||
/* store result to destination */
|
pDst += 4U;
|
||||||
*(pDst + 1) = in2;
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* store result to destination */
|
blkCnt--;
|
||||||
*(pDst + 2) = in3;
|
}
|
||||||
|
|
||||||
/* store result to destination */
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
*(pDst + 3) = in4;
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = blockSize % 0x4U;
|
||||||
/* update pointers to process next samples */
|
|
||||||
pSrc += 4u;
|
#else
|
||||||
pDst += 4u;
|
|
||||||
|
/* Run the below code for Cortex-M0 */
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
/* Initialize blkCnt with number of samples */
|
||||||
}
|
blkCnt = blockSize;
|
||||||
|
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#else
|
/* C = A + offset */
|
||||||
|
/* Add offset and then store the result in the destination buffer. */
|
||||||
/* Run the below code for Cortex-M0 */
|
*pDst++ = (*pSrc++) + offset;
|
||||||
|
|
||||||
/* Initialize blkCnt with number of samples */
|
/* Decrement the loop counter */
|
||||||
blkCnt = blockSize;
|
blkCnt--;
|
||||||
|
}
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
}
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/**
|
||||||
{
|
* @} end of offset group
|
||||||
/* C = A + offset */
|
*/
|
||||||
/* Add offset and then store the result in the destination buffer. */
|
|
||||||
*pDst++ = (*pSrc++) + offset;
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of offset group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,136 +1,124 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_offset_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q15 vector offset
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_offset_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q15 vector offset.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup offset
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Adds a constant offset to a Q15 vector.
|
||||||
|
* @param[in] *pSrc points to the input vector
|
||||||
/**
|
* @param[in] offset is the offset to be added
|
||||||
* @ingroup groupMath
|
* @param[out] *pDst points to the output vector
|
||||||
*/
|
* @param[in] blockSize number of samples in the vector
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup offset
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The function uses saturating arithmetic.
|
||||||
|
* Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
|
||||||
/**
|
*/
|
||||||
* @brief Adds a constant offset to a Q15 vector.
|
|
||||||
* @param[in] *pSrc points to the input vector
|
void arm_offset_q15(
|
||||||
* @param[in] offset is the offset to be added
|
q15_t * pSrc,
|
||||||
* @param[out] *pDst points to the output vector
|
q15_t offset,
|
||||||
* @param[in] blockSize number of samples in the vector
|
q15_t * pDst,
|
||||||
* @return none.
|
uint32_t blockSize)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blkCnt; /* loop counter */
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
#if defined (ARM_MATH_DSP)
|
||||||
* Results outside of the allowable Q15 range [0x8000 0x7FFF] are saturated.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
q31_t offset_packed; /* Offset packed to 32 bit */
|
||||||
void arm_offset_q15(
|
|
||||||
q15_t * pSrc,
|
|
||||||
q15_t offset,
|
/*loop Unrolling */
|
||||||
q15_t * pDst,
|
blkCnt = blockSize >> 2U;
|
||||||
uint32_t blockSize)
|
|
||||||
{
|
/* Offset is packed to 32 bit in order to use SIMD32 for addition */
|
||||||
uint32_t blkCnt; /* loop counter */
|
offset_packed = __PKHBT(offset, offset, 16);
|
||||||
|
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
while (blkCnt > 0U)
|
||||||
q31_t offset_packed; /* Offset packed to 32 bit */
|
{
|
||||||
|
/* C = A + offset */
|
||||||
|
/* Add offset and then store the results in the destination buffer, 2 samples at a time. */
|
||||||
/*loop Unrolling */
|
*__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrc)++, offset_packed);
|
||||||
blkCnt = blockSize >> 2u;
|
*__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrc)++, offset_packed);
|
||||||
|
|
||||||
/* Offset is packed to 32 bit in order to use SIMD32 for addition */
|
/* Decrement the loop counter */
|
||||||
offset_packed = __PKHBT(offset, offset, 16);
|
blkCnt--;
|
||||||
|
}
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
while(blkCnt > 0u)
|
** No loop unrolling is used. */
|
||||||
{
|
blkCnt = blockSize % 0x4U;
|
||||||
/* C = A + offset */
|
|
||||||
/* Add offset and then store the results in the destination buffer, 2 samples at a time. */
|
while (blkCnt > 0U)
|
||||||
*__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrc)++, offset_packed);
|
{
|
||||||
*__SIMD32(pDst)++ = __QADD16(*__SIMD32(pSrc)++, offset_packed);
|
/* C = A + offset */
|
||||||
|
/* Add offset and then store the results in the destination buffer. */
|
||||||
/* Decrement the loop counter */
|
*pDst++ = (q15_t) __QADD16(*pSrc++, offset);
|
||||||
blkCnt--;
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
}
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
#else
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/* Run the below code for Cortex-M0 */
|
||||||
{
|
|
||||||
/* C = A + offset */
|
/* Initialize blkCnt with number of samples */
|
||||||
/* Add offset and then store the results in the destination buffer. */
|
blkCnt = blockSize;
|
||||||
*pDst++ = (q15_t) __QADD16(*pSrc++, offset);
|
|
||||||
|
while (blkCnt > 0U)
|
||||||
/* Decrement the loop counter */
|
{
|
||||||
blkCnt--;
|
/* C = A + offset */
|
||||||
}
|
/* Add offset and then store the results in the destination buffer. */
|
||||||
|
*pDst++ = (q15_t) __SSAT(((q31_t) * pSrc++ + offset), 16);
|
||||||
#else
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* Run the below code for Cortex-M0 */
|
blkCnt--;
|
||||||
|
}
|
||||||
/* Initialize blkCnt with number of samples */
|
|
||||||
blkCnt = blockSize;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
}
|
||||||
{
|
|
||||||
/* C = A + offset */
|
/**
|
||||||
/* Add offset and then store the results in the destination buffer. */
|
* @} end of offset group
|
||||||
*pDst++ = (q15_t) __SSAT(((q31_t) * pSrc++ + offset), 16);
|
*/
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of offset group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,140 +1,128 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_offset_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q31 vector offset
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_offset_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q31 vector offset.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup offset
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Adds a constant offset to a Q31 vector.
|
||||||
|
* @param[in] *pSrc points to the input vector
|
||||||
/**
|
* @param[in] offset is the offset to be added
|
||||||
* @ingroup groupMath
|
* @param[out] *pDst points to the output vector
|
||||||
*/
|
* @param[in] blockSize number of samples in the vector
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup offset
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The function uses saturating arithmetic.
|
||||||
|
* Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
|
||||||
/**
|
*/
|
||||||
* @brief Adds a constant offset to a Q31 vector.
|
|
||||||
* @param[in] *pSrc points to the input vector
|
void arm_offset_q31(
|
||||||
* @param[in] offset is the offset to be added
|
q31_t * pSrc,
|
||||||
* @param[out] *pDst points to the output vector
|
q31_t offset,
|
||||||
* @param[in] blockSize number of samples in the vector
|
q31_t * pDst,
|
||||||
* @return none.
|
uint32_t blockSize)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blkCnt; /* loop counter */
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
#if defined (ARM_MATH_DSP)
|
||||||
* Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] are saturated.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
q31_t in1, in2, in3, in4;
|
||||||
void arm_offset_q31(
|
|
||||||
q31_t * pSrc,
|
|
||||||
q31_t offset,
|
/*loop Unrolling */
|
||||||
q31_t * pDst,
|
blkCnt = blockSize >> 2U;
|
||||||
uint32_t blockSize)
|
|
||||||
{
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
uint32_t blkCnt; /* loop counter */
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
{
|
||||||
|
/* C = A + offset */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* Add offset and then store the results in the destination buffer. */
|
||||||
q31_t in1, in2, in3, in4;
|
in1 = *pSrc++;
|
||||||
|
in2 = *pSrc++;
|
||||||
|
in3 = *pSrc++;
|
||||||
/*loop Unrolling */
|
in4 = *pSrc++;
|
||||||
blkCnt = blockSize >> 2u;
|
|
||||||
|
*pDst++ = __QADD(in1, offset);
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
*pDst++ = __QADD(in2, offset);
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
*pDst++ = __QADD(in3, offset);
|
||||||
while(blkCnt > 0u)
|
*pDst++ = __QADD(in4, offset);
|
||||||
{
|
|
||||||
/* C = A + offset */
|
/* Decrement the loop counter */
|
||||||
/* Add offset and then store the results in the destination buffer. */
|
blkCnt--;
|
||||||
in1 = *pSrc++;
|
}
|
||||||
in2 = *pSrc++;
|
|
||||||
in3 = *pSrc++;
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
in4 = *pSrc++;
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = blockSize % 0x4U;
|
||||||
*pDst++ = __QADD(in1, offset);
|
|
||||||
*pDst++ = __QADD(in2, offset);
|
while (blkCnt > 0U)
|
||||||
*pDst++ = __QADD(in3, offset);
|
{
|
||||||
*pDst++ = __QADD(in4, offset);
|
/* C = A + offset */
|
||||||
|
/* Add offset and then store the result in the destination buffer. */
|
||||||
/* Decrement the loop counter */
|
*pDst++ = __QADD(*pSrc++, offset);
|
||||||
blkCnt--;
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
}
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
#else
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/* Run the below code for Cortex-M0 */
|
||||||
{
|
|
||||||
/* C = A + offset */
|
/* Initialize blkCnt with number of samples */
|
||||||
/* Add offset and then store the result in the destination buffer. */
|
blkCnt = blockSize;
|
||||||
*pDst++ = __QADD(*pSrc++, offset);
|
|
||||||
|
while (blkCnt > 0U)
|
||||||
/* Decrement the loop counter */
|
{
|
||||||
blkCnt--;
|
/* C = A + offset */
|
||||||
}
|
/* Add offset and then store the result in the destination buffer. */
|
||||||
|
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
|
||||||
#else
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* Run the below code for Cortex-M0 */
|
blkCnt--;
|
||||||
|
}
|
||||||
/* Initialize blkCnt with number of samples */
|
|
||||||
blkCnt = blockSize;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
}
|
||||||
{
|
|
||||||
/* C = A + offset */
|
/**
|
||||||
/* Add offset and then store the result in the destination buffer. */
|
* @} end of offset group
|
||||||
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrc++ + offset);
|
*/
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of offset group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,135 +1,123 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_offset_q7.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q7 vector offset
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_offset_q7.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q7 vector offset.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup offset
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Adds a constant offset to a Q7 vector.
|
||||||
|
* @param[in] *pSrc points to the input vector
|
||||||
/**
|
* @param[in] offset is the offset to be added
|
||||||
* @ingroup groupMath
|
* @param[out] *pDst points to the output vector
|
||||||
*/
|
* @param[in] blockSize number of samples in the vector
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup offset
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The function uses saturating arithmetic.
|
||||||
|
* Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
|
||||||
/**
|
*/
|
||||||
* @brief Adds a constant offset to a Q7 vector.
|
|
||||||
* @param[in] *pSrc points to the input vector
|
void arm_offset_q7(
|
||||||
* @param[in] offset is the offset to be added
|
q7_t * pSrc,
|
||||||
* @param[out] *pDst points to the output vector
|
q7_t offset,
|
||||||
* @param[in] blockSize number of samples in the vector
|
q7_t * pDst,
|
||||||
* @return none.
|
uint32_t blockSize)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blkCnt; /* loop counter */
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
#if defined (ARM_MATH_DSP)
|
||||||
* Results outside of the allowable Q7 range [0x80 0x7F] are saturated.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
q31_t offset_packed; /* Offset packed to 32 bit */
|
||||||
void arm_offset_q7(
|
|
||||||
q7_t * pSrc,
|
|
||||||
q7_t offset,
|
/*loop Unrolling */
|
||||||
q7_t * pDst,
|
blkCnt = blockSize >> 2U;
|
||||||
uint32_t blockSize)
|
|
||||||
{
|
/* Offset is packed to 32 bit in order to use SIMD32 for addition */
|
||||||
uint32_t blkCnt; /* loop counter */
|
offset_packed = __PACKq7(offset, offset, offset, offset);
|
||||||
|
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
while (blkCnt > 0U)
|
||||||
q31_t offset_packed; /* Offset packed to 32 bit */
|
{
|
||||||
|
/* C = A + offset */
|
||||||
|
/* Add offset and then store the results in the destination bufferfor 4 samples at a time. */
|
||||||
/*loop Unrolling */
|
*__SIMD32(pDst)++ = __QADD8(*__SIMD32(pSrc)++, offset_packed);
|
||||||
blkCnt = blockSize >> 2u;
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* Offset is packed to 32 bit in order to use SIMD32 for addition */
|
blkCnt--;
|
||||||
offset_packed = __PACKq7(offset, offset, offset, offset);
|
}
|
||||||
|
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
** No loop unrolling is used. */
|
||||||
while(blkCnt > 0u)
|
blkCnt = blockSize % 0x4U;
|
||||||
{
|
|
||||||
/* C = A + offset */
|
while (blkCnt > 0U)
|
||||||
/* Add offset and then store the results in the destination bufferfor 4 samples at a time. */
|
{
|
||||||
*__SIMD32(pDst)++ = __QADD8(*__SIMD32(pSrc)++, offset_packed);
|
/* C = A + offset */
|
||||||
|
/* Add offset and then store the result in the destination buffer. */
|
||||||
/* Decrement the loop counter */
|
*pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);
|
||||||
blkCnt--;
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
}
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
#else
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/* Run the below code for Cortex-M0 */
|
||||||
{
|
|
||||||
/* C = A + offset */
|
/* Initialize blkCnt with number of samples */
|
||||||
/* Add offset and then store the result in the destination buffer. */
|
blkCnt = blockSize;
|
||||||
*pDst++ = (q7_t) __SSAT(*pSrc++ + offset, 8);
|
|
||||||
|
while (blkCnt > 0U)
|
||||||
/* Decrement the loop counter */
|
{
|
||||||
blkCnt--;
|
/* C = A + offset */
|
||||||
}
|
/* Add offset and then store the result in the destination buffer. */
|
||||||
|
*pDst++ = (q7_t) __SSAT((q15_t) * pSrc++ + offset, 8);
|
||||||
#else
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* Run the below code for Cortex-M0 */
|
blkCnt--;
|
||||||
|
}
|
||||||
/* Initialize blkCnt with number of samples */
|
|
||||||
blkCnt = blockSize;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
}
|
||||||
{
|
|
||||||
/* C = A + offset */
|
/**
|
||||||
/* Add offset and then store the result in the destination buffer. */
|
* @} end of offset group
|
||||||
*pDst++ = (q7_t) __SSAT((q15_t) * pSrc++ + offset, 8);
|
*/
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of offset group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,169 +1,157 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_scale_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Multiplies a floating-point vector by a scalar
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_scale_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Multiplies a floating-point vector by a scalar.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @defgroup scale Vector Scale
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* Multiply a vector by a scalar value. For floating-point data, the algorithm used is:
|
||||||
* ---------------------------------------------------------------------------- */
|
*
|
||||||
|
* <pre>
|
||||||
#include "arm_math.h"
|
* pDst[n] = pSrc[n] * scale, 0 <= n < blockSize.
|
||||||
|
* </pre>
|
||||||
/**
|
*
|
||||||
* @ingroup groupMath
|
* In the fixed-point Q7, Q15, and Q31 functions, <code>scale</code> is represented by
|
||||||
*/
|
* a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.
|
||||||
|
* The shift allows the gain of the scaling operation to exceed 1.0.
|
||||||
/**
|
* The algorithm used with fixed-point data is:
|
||||||
* @defgroup scale Vector Scale
|
*
|
||||||
*
|
* <pre>
|
||||||
* Multiply a vector by a scalar value. For floating-point data, the algorithm used is:
|
* pDst[n] = (pSrc[n] * scaleFract) << shift, 0 <= n < blockSize.
|
||||||
*
|
* </pre>
|
||||||
* <pre>
|
*
|
||||||
* pDst[n] = pSrc[n] * scale, 0 <= n < blockSize.
|
* The overall scale factor applied to the fixed-point data is
|
||||||
* </pre>
|
* <pre>
|
||||||
*
|
* scale = scaleFract * 2^shift.
|
||||||
* In the fixed-point Q7, Q15, and Q31 functions, <code>scale</code> is represented by
|
* </pre>
|
||||||
* a fractional multiplication <code>scaleFract</code> and an arithmetic shift <code>shift</code>.
|
*
|
||||||
* The shift allows the gain of the scaling operation to exceed 1.0.
|
* The functions support in-place computation allowing the source and destination
|
||||||
* The algorithm used with fixed-point data is:
|
* pointers to reference the same memory buffer.
|
||||||
*
|
*/
|
||||||
* <pre>
|
|
||||||
* pDst[n] = (pSrc[n] * scaleFract) << shift, 0 <= n < blockSize.
|
/**
|
||||||
* </pre>
|
* @addtogroup scale
|
||||||
*
|
* @{
|
||||||
* The overall scale factor applied to the fixed-point data is
|
*/
|
||||||
* <pre>
|
|
||||||
* scale = scaleFract * 2^shift.
|
/**
|
||||||
* </pre>
|
* @brief Multiplies a floating-point vector by a scalar.
|
||||||
*
|
* @param[in] *pSrc points to the input vector
|
||||||
* The functions support in-place computation allowing the source and destination
|
* @param[in] scale scale factor to be applied
|
||||||
* pointers to reference the same memory buffer.
|
* @param[out] *pDst points to the output vector
|
||||||
*/
|
* @param[in] blockSize number of samples in the vector
|
||||||
|
* @return none.
|
||||||
/**
|
*/
|
||||||
* @addtogroup scale
|
|
||||||
* @{
|
|
||||||
*/
|
void arm_scale_f32(
|
||||||
|
float32_t * pSrc,
|
||||||
/**
|
float32_t scale,
|
||||||
* @brief Multiplies a floating-point vector by a scalar.
|
float32_t * pDst,
|
||||||
* @param[in] *pSrc points to the input vector
|
uint32_t blockSize)
|
||||||
* @param[in] scale scale factor to be applied
|
{
|
||||||
* @param[out] *pDst points to the output vector
|
uint32_t blkCnt; /* loop counter */
|
||||||
* @param[in] blockSize number of samples in the vector
|
#if defined (ARM_MATH_DSP)
|
||||||
* @return none.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
float32_t in1, in2, in3, in4; /* temporary variabels */
|
||||||
|
|
||||||
void arm_scale_f32(
|
/*loop Unrolling */
|
||||||
float32_t * pSrc,
|
blkCnt = blockSize >> 2U;
|
||||||
float32_t scale,
|
|
||||||
float32_t * pDst,
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
uint32_t blockSize)
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
{
|
while (blkCnt > 0U)
|
||||||
uint32_t blkCnt; /* loop counter */
|
{
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* C = A * scale */
|
||||||
|
/* Scale the input and then store the results in the destination buffer. */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* read input samples from source */
|
||||||
float32_t in1, in2, in3, in4; /* temporary variabels */
|
in1 = *pSrc;
|
||||||
|
in2 = *(pSrc + 1);
|
||||||
/*loop Unrolling */
|
|
||||||
blkCnt = blockSize >> 2u;
|
/* multiply with scaling factor */
|
||||||
|
in1 = in1 * scale;
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
/* read input sample from source */
|
||||||
while(blkCnt > 0u)
|
in3 = *(pSrc + 2);
|
||||||
{
|
|
||||||
/* C = A * scale */
|
/* multiply with scaling factor */
|
||||||
/* Scale the input and then store the results in the destination buffer. */
|
in2 = in2 * scale;
|
||||||
/* read input samples from source */
|
|
||||||
in1 = *pSrc;
|
/* read input sample from source */
|
||||||
in2 = *(pSrc + 1);
|
in4 = *(pSrc + 3);
|
||||||
|
|
||||||
/* multiply with scaling factor */
|
/* multiply with scaling factor */
|
||||||
in1 = in1 * scale;
|
in3 = in3 * scale;
|
||||||
|
in4 = in4 * scale;
|
||||||
/* read input sample from source */
|
/* store the result to destination */
|
||||||
in3 = *(pSrc + 2);
|
*pDst = in1;
|
||||||
|
*(pDst + 1) = in2;
|
||||||
/* multiply with scaling factor */
|
*(pDst + 2) = in3;
|
||||||
in2 = in2 * scale;
|
*(pDst + 3) = in4;
|
||||||
|
|
||||||
/* read input sample from source */
|
/* update pointers to process next samples */
|
||||||
in4 = *(pSrc + 3);
|
pSrc += 4U;
|
||||||
|
pDst += 4U;
|
||||||
/* multiply with scaling factor */
|
|
||||||
in3 = in3 * scale;
|
/* Decrement the loop counter */
|
||||||
in4 = in4 * scale;
|
blkCnt--;
|
||||||
/* store the result to destination */
|
}
|
||||||
*pDst = in1;
|
|
||||||
*(pDst + 1) = in2;
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
*(pDst + 2) = in3;
|
** No loop unrolling is used. */
|
||||||
*(pDst + 3) = in4;
|
blkCnt = blockSize % 0x4U;
|
||||||
|
|
||||||
/* update pointers to process next samples */
|
#else
|
||||||
pSrc += 4u;
|
|
||||||
pDst += 4u;
|
/* Run the below code for Cortex-M0 */
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
/* Initialize blkCnt with number of samples */
|
||||||
blkCnt--;
|
blkCnt = blockSize;
|
||||||
}
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
while (blkCnt > 0U)
|
||||||
blkCnt = blockSize % 0x4u;
|
{
|
||||||
|
/* C = A * scale */
|
||||||
#else
|
/* Scale the input and then store the result in the destination buffer. */
|
||||||
|
*pDst++ = (*pSrc++) * scale;
|
||||||
/* Run the below code for Cortex-M0 */
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* Initialize blkCnt with number of samples */
|
blkCnt--;
|
||||||
blkCnt = blockSize;
|
}
|
||||||
|
}
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
/**
|
||||||
while(blkCnt > 0u)
|
* @} end of scale group
|
||||||
{
|
*/
|
||||||
/* C = A * scale */
|
|
||||||
/* Scale the input and then store the result in the destination buffer. */
|
|
||||||
*pDst++ = (*pSrc++) * scale;
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of scale group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,162 +1,150 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_scale_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Multiplies a Q15 vector by a scalar
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_scale_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Multiplies a Q15 vector by a scalar.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup scale
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Multiplies a Q15 vector by a scalar.
|
||||||
|
* @param[in] *pSrc points to the input vector
|
||||||
/**
|
* @param[in] scaleFract fractional portion of the scale value
|
||||||
* @ingroup groupMath
|
* @param[in] shift number of bits to shift the result by
|
||||||
*/
|
* @param[out] *pDst points to the output vector
|
||||||
|
* @param[in] blockSize number of samples in the vector
|
||||||
/**
|
* @return none.
|
||||||
* @addtogroup scale
|
*
|
||||||
* @{
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
*/
|
* \par
|
||||||
|
* The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.15 format.
|
||||||
/**
|
* These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to 1.15 format.
|
||||||
* @brief Multiplies a Q15 vector by a scalar.
|
*/
|
||||||
* @param[in] *pSrc points to the input vector
|
|
||||||
* @param[in] scaleFract fractional portion of the scale value
|
|
||||||
* @param[in] shift number of bits to shift the result by
|
void arm_scale_q15(
|
||||||
* @param[out] *pDst points to the output vector
|
q15_t * pSrc,
|
||||||
* @param[in] blockSize number of samples in the vector
|
q15_t scaleFract,
|
||||||
* @return none.
|
int8_t shift,
|
||||||
*
|
q15_t * pDst,
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blockSize)
|
||||||
* \par
|
{
|
||||||
* The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.15 format.
|
int8_t kShift = 15 - shift; /* shift to apply after scaling */
|
||||||
* These are multiplied to yield a 2.30 intermediate result and this is shifted with saturation to 1.15 format.
|
uint32_t blkCnt; /* loop counter */
|
||||||
*/
|
|
||||||
|
#if defined (ARM_MATH_DSP)
|
||||||
|
|
||||||
void arm_scale_q15(
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
q15_t * pSrc,
|
q15_t in1, in2, in3, in4;
|
||||||
q15_t scaleFract,
|
q31_t inA1, inA2; /* Temporary variables */
|
||||||
int8_t shift,
|
q31_t out1, out2, out3, out4;
|
||||||
q15_t * pDst,
|
|
||||||
uint32_t blockSize)
|
|
||||||
{
|
/*loop Unrolling */
|
||||||
int8_t kShift = 15 - shift; /* shift to apply after scaling */
|
blkCnt = blockSize >> 2U;
|
||||||
uint32_t blkCnt; /* loop counter */
|
|
||||||
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
{
|
||||||
q15_t in1, in2, in3, in4;
|
/* Reading 2 inputs from memory */
|
||||||
q31_t inA1, inA2; /* Temporary variables */
|
inA1 = *__SIMD32(pSrc)++;
|
||||||
q31_t out1, out2, out3, out4;
|
inA2 = *__SIMD32(pSrc)++;
|
||||||
|
|
||||||
|
/* C = A * scale */
|
||||||
/*loop Unrolling */
|
/* Scale the inputs and then store the 2 results in the destination buffer
|
||||||
blkCnt = blockSize >> 2u;
|
* in single cycle by packing the outputs */
|
||||||
|
out1 = (q31_t) ((q15_t) (inA1 >> 16) * scaleFract);
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
out2 = (q31_t) ((q15_t) inA1 * scaleFract);
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
out3 = (q31_t) ((q15_t) (inA2 >> 16) * scaleFract);
|
||||||
while(blkCnt > 0u)
|
out4 = (q31_t) ((q15_t) inA2 * scaleFract);
|
||||||
{
|
|
||||||
/* Reading 2 inputs from memory */
|
/* apply shifting */
|
||||||
inA1 = *__SIMD32(pSrc)++;
|
out1 = out1 >> kShift;
|
||||||
inA2 = *__SIMD32(pSrc)++;
|
out2 = out2 >> kShift;
|
||||||
|
out3 = out3 >> kShift;
|
||||||
/* C = A * scale */
|
out4 = out4 >> kShift;
|
||||||
/* Scale the inputs and then store the 2 results in the destination buffer
|
|
||||||
* in single cycle by packing the outputs */
|
/* saturate the output */
|
||||||
out1 = (q31_t) ((q15_t) (inA1 >> 16) * scaleFract);
|
in1 = (q15_t) (__SSAT(out1, 16));
|
||||||
out2 = (q31_t) ((q15_t) inA1 * scaleFract);
|
in2 = (q15_t) (__SSAT(out2, 16));
|
||||||
out3 = (q31_t) ((q15_t) (inA2 >> 16) * scaleFract);
|
in3 = (q15_t) (__SSAT(out3, 16));
|
||||||
out4 = (q31_t) ((q15_t) inA2 * scaleFract);
|
in4 = (q15_t) (__SSAT(out4, 16));
|
||||||
|
|
||||||
/* apply shifting */
|
/* store the result to destination */
|
||||||
out1 = out1 >> kShift;
|
*__SIMD32(pDst)++ = __PKHBT(in2, in1, 16);
|
||||||
out2 = out2 >> kShift;
|
*__SIMD32(pDst)++ = __PKHBT(in4, in3, 16);
|
||||||
out3 = out3 >> kShift;
|
|
||||||
out4 = out4 >> kShift;
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* saturate the output */
|
}
|
||||||
in1 = (q15_t) (__SSAT(out1, 16));
|
|
||||||
in2 = (q15_t) (__SSAT(out2, 16));
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
in3 = (q15_t) (__SSAT(out3, 16));
|
** No loop unrolling is used. */
|
||||||
in4 = (q15_t) (__SSAT(out4, 16));
|
blkCnt = blockSize % 0x4U;
|
||||||
|
|
||||||
/* store the result to destination */
|
while (blkCnt > 0U)
|
||||||
*__SIMD32(pDst)++ = __PKHBT(in2, in1, 16);
|
{
|
||||||
*__SIMD32(pDst)++ = __PKHBT(in4, in3, 16);
|
/* C = A * scale */
|
||||||
|
/* Scale the input and then store the result in the destination buffer. */
|
||||||
/* Decrement the loop counter */
|
*pDst++ = (q15_t) (__SSAT(((*pSrc++) * scaleFract) >> kShift, 16));
|
||||||
blkCnt--;
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
}
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
#else
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/* Run the below code for Cortex-M0 */
|
||||||
{
|
|
||||||
/* C = A * scale */
|
/* Initialize blkCnt with number of samples */
|
||||||
/* Scale the input and then store the result in the destination buffer. */
|
blkCnt = blockSize;
|
||||||
*pDst++ = (q15_t) (__SSAT(((*pSrc++) * scaleFract) >> kShift, 16));
|
|
||||||
|
while (blkCnt > 0U)
|
||||||
/* Decrement the loop counter */
|
{
|
||||||
blkCnt--;
|
/* C = A * scale */
|
||||||
}
|
/* Scale the input and then store the result in the destination buffer. */
|
||||||
|
*pDst++ = (q15_t) (__SSAT(((q31_t) * pSrc++ * scaleFract) >> kShift, 16));
|
||||||
#else
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* Run the below code for Cortex-M0 */
|
blkCnt--;
|
||||||
|
}
|
||||||
/* Initialize blkCnt with number of samples */
|
|
||||||
blkCnt = blockSize;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
}
|
||||||
{
|
|
||||||
/* C = A * scale */
|
/**
|
||||||
/* Scale the input and then store the result in the destination buffer. */
|
* @} end of scale group
|
||||||
*pDst++ = (q15_t) (__SSAT(((q31_t) * pSrc++ * scaleFract) >> kShift, 16));
|
*/
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of scale group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,239 +1,227 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_scale_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Multiplies a Q31 vector by a scalar
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_scale_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Multiplies a Q31 vector by a scalar.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup scale
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Multiplies a Q31 vector by a scalar.
|
||||||
|
* @param[in] *pSrc points to the input vector
|
||||||
/**
|
* @param[in] scaleFract fractional portion of the scale value
|
||||||
* @ingroup groupMath
|
* @param[in] shift number of bits to shift the result by
|
||||||
*/
|
* @param[out] *pDst points to the output vector
|
||||||
|
* @param[in] blockSize number of samples in the vector
|
||||||
/**
|
* @return none.
|
||||||
* @addtogroup scale
|
*
|
||||||
* @{
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
*/
|
* \par
|
||||||
|
* The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.31 format.
|
||||||
/**
|
* These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to 1.31 format.
|
||||||
* @brief Multiplies a Q31 vector by a scalar.
|
*/
|
||||||
* @param[in] *pSrc points to the input vector
|
|
||||||
* @param[in] scaleFract fractional portion of the scale value
|
void arm_scale_q31(
|
||||||
* @param[in] shift number of bits to shift the result by
|
q31_t * pSrc,
|
||||||
* @param[out] *pDst points to the output vector
|
q31_t scaleFract,
|
||||||
* @param[in] blockSize number of samples in the vector
|
int8_t shift,
|
||||||
* @return none.
|
q31_t * pDst,
|
||||||
*
|
uint32_t blockSize)
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
{
|
||||||
* \par
|
int8_t kShift = shift + 1; /* Shift to apply after scaling */
|
||||||
* The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.31 format.
|
int8_t sign = (kShift & 0x80);
|
||||||
* These are multiplied to yield a 2.62 intermediate result and this is shifted with saturation to 1.31 format.
|
uint32_t blkCnt; /* loop counter */
|
||||||
*/
|
q31_t in, out;
|
||||||
|
|
||||||
void arm_scale_q31(
|
#if defined (ARM_MATH_DSP)
|
||||||
q31_t * pSrc,
|
|
||||||
q31_t scaleFract,
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
int8_t shift,
|
|
||||||
q31_t * pDst,
|
q31_t in1, in2, in3, in4; /* temporary input variables */
|
||||||
uint32_t blockSize)
|
q31_t out1, out2, out3, out4; /* temporary output variabels */
|
||||||
{
|
|
||||||
int8_t kShift = shift + 1; /* Shift to apply after scaling */
|
|
||||||
int8_t sign = (kShift & 0x80);
|
/*loop Unrolling */
|
||||||
uint32_t blkCnt; /* loop counter */
|
blkCnt = blockSize >> 2U;
|
||||||
q31_t in, out;
|
|
||||||
|
if (sign == 0U)
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
{
|
||||||
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
q31_t in1, in2, in3, in4; /* temporary input variables */
|
{
|
||||||
q31_t out1, out2, out3, out4; /* temporary output variabels */
|
/* read four inputs from source */
|
||||||
|
in1 = *pSrc;
|
||||||
|
in2 = *(pSrc + 1);
|
||||||
/*loop Unrolling */
|
in3 = *(pSrc + 2);
|
||||||
blkCnt = blockSize >> 2u;
|
in4 = *(pSrc + 3);
|
||||||
|
|
||||||
if(sign == 0u)
|
/* multiply input with scaler value */
|
||||||
{
|
in1 = ((q63_t) in1 * scaleFract) >> 32;
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
in2 = ((q63_t) in2 * scaleFract) >> 32;
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
in3 = ((q63_t) in3 * scaleFract) >> 32;
|
||||||
while(blkCnt > 0u)
|
in4 = ((q63_t) in4 * scaleFract) >> 32;
|
||||||
{
|
|
||||||
/* read four inputs from source */
|
/* apply shifting */
|
||||||
in1 = *pSrc;
|
out1 = in1 << kShift;
|
||||||
in2 = *(pSrc + 1);
|
out2 = in2 << kShift;
|
||||||
in3 = *(pSrc + 2);
|
|
||||||
in4 = *(pSrc + 3);
|
/* saturate the results. */
|
||||||
|
if (in1 != (out1 >> kShift))
|
||||||
/* multiply input with scaler value */
|
out1 = 0x7FFFFFFF ^ (in1 >> 31);
|
||||||
in1 = ((q63_t) in1 * scaleFract) >> 32;
|
|
||||||
in2 = ((q63_t) in2 * scaleFract) >> 32;
|
if (in2 != (out2 >> kShift))
|
||||||
in3 = ((q63_t) in3 * scaleFract) >> 32;
|
out2 = 0x7FFFFFFF ^ (in2 >> 31);
|
||||||
in4 = ((q63_t) in4 * scaleFract) >> 32;
|
|
||||||
|
out3 = in3 << kShift;
|
||||||
/* apply shifting */
|
out4 = in4 << kShift;
|
||||||
out1 = in1 << kShift;
|
|
||||||
out2 = in2 << kShift;
|
*pDst = out1;
|
||||||
|
*(pDst + 1) = out2;
|
||||||
/* saturate the results. */
|
|
||||||
if(in1 != (out1 >> kShift))
|
if (in3 != (out3 >> kShift))
|
||||||
out1 = 0x7FFFFFFF ^ (in1 >> 31);
|
out3 = 0x7FFFFFFF ^ (in3 >> 31);
|
||||||
|
|
||||||
if(in2 != (out2 >> kShift))
|
if (in4 != (out4 >> kShift))
|
||||||
out2 = 0x7FFFFFFF ^ (in2 >> 31);
|
out4 = 0x7FFFFFFF ^ (in4 >> 31);
|
||||||
|
|
||||||
out3 = in3 << kShift;
|
/* Store result destination */
|
||||||
out4 = in4 << kShift;
|
*(pDst + 2) = out3;
|
||||||
|
*(pDst + 3) = out4;
|
||||||
*pDst = out1;
|
|
||||||
*(pDst + 1) = out2;
|
/* Update pointers to process next sampels */
|
||||||
|
pSrc += 4U;
|
||||||
if(in3 != (out3 >> kShift))
|
pDst += 4U;
|
||||||
out3 = 0x7FFFFFFF ^ (in3 >> 31);
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
if(in4 != (out4 >> kShift))
|
blkCnt--;
|
||||||
out4 = 0x7FFFFFFF ^ (in4 >> 31);
|
}
|
||||||
|
|
||||||
/* Store result destination */
|
}
|
||||||
*(pDst + 2) = out3;
|
else
|
||||||
*(pDst + 3) = out4;
|
{
|
||||||
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
/* Update pointers to process next sampels */
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
pSrc += 4u;
|
while (blkCnt > 0U)
|
||||||
pDst += 4u;
|
{
|
||||||
|
/* read four inputs from source */
|
||||||
/* Decrement the loop counter */
|
in1 = *pSrc;
|
||||||
blkCnt--;
|
in2 = *(pSrc + 1);
|
||||||
}
|
in3 = *(pSrc + 2);
|
||||||
|
in4 = *(pSrc + 3);
|
||||||
}
|
|
||||||
else
|
/* multiply input with scaler value */
|
||||||
{
|
in1 = ((q63_t) in1 * scaleFract) >> 32;
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
in2 = ((q63_t) in2 * scaleFract) >> 32;
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
in3 = ((q63_t) in3 * scaleFract) >> 32;
|
||||||
while(blkCnt > 0u)
|
in4 = ((q63_t) in4 * scaleFract) >> 32;
|
||||||
{
|
|
||||||
/* read four inputs from source */
|
/* apply shifting */
|
||||||
in1 = *pSrc;
|
out1 = in1 >> -kShift;
|
||||||
in2 = *(pSrc + 1);
|
out2 = in2 >> -kShift;
|
||||||
in3 = *(pSrc + 2);
|
|
||||||
in4 = *(pSrc + 3);
|
out3 = in3 >> -kShift;
|
||||||
|
out4 = in4 >> -kShift;
|
||||||
/* multiply input with scaler value */
|
|
||||||
in1 = ((q63_t) in1 * scaleFract) >> 32;
|
/* Store result destination */
|
||||||
in2 = ((q63_t) in2 * scaleFract) >> 32;
|
*pDst = out1;
|
||||||
in3 = ((q63_t) in3 * scaleFract) >> 32;
|
*(pDst + 1) = out2;
|
||||||
in4 = ((q63_t) in4 * scaleFract) >> 32;
|
|
||||||
|
*(pDst + 2) = out3;
|
||||||
/* apply shifting */
|
*(pDst + 3) = out4;
|
||||||
out1 = in1 >> -kShift;
|
|
||||||
out2 = in2 >> -kShift;
|
/* Update pointers to process next sampels */
|
||||||
|
pSrc += 4U;
|
||||||
out3 = in3 >> -kShift;
|
pDst += 4U;
|
||||||
out4 = in4 >> -kShift;
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* Store result destination */
|
blkCnt--;
|
||||||
*pDst = out1;
|
}
|
||||||
*(pDst + 1) = out2;
|
}
|
||||||
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
*(pDst + 2) = out3;
|
** No loop unrolling is used. */
|
||||||
*(pDst + 3) = out4;
|
blkCnt = blockSize % 0x4U;
|
||||||
|
|
||||||
/* Update pointers to process next sampels */
|
#else
|
||||||
pSrc += 4u;
|
|
||||||
pDst += 4u;
|
/* Run the below code for Cortex-M0 */
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
/* Initialize blkCnt with number of samples */
|
||||||
blkCnt--;
|
blkCnt = blockSize;
|
||||||
}
|
|
||||||
}
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
if (sign == 0)
|
||||||
blkCnt = blockSize % 0x4u;
|
{
|
||||||
|
while (blkCnt > 0U)
|
||||||
#else
|
{
|
||||||
|
/* C = A * scale */
|
||||||
/* Run the below code for Cortex-M0 */
|
/* Scale the input and then store the result in the destination buffer. */
|
||||||
|
in = *pSrc++;
|
||||||
/* Initialize blkCnt with number of samples */
|
in = ((q63_t) in * scaleFract) >> 32;
|
||||||
blkCnt = blockSize;
|
|
||||||
|
out = in << kShift;
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
if (in != (out >> kShift))
|
||||||
if(sign == 0)
|
out = 0x7FFFFFFF ^ (in >> 31);
|
||||||
{
|
|
||||||
while(blkCnt > 0u)
|
*pDst++ = out;
|
||||||
{
|
|
||||||
/* C = A * scale */
|
/* Decrement the loop counter */
|
||||||
/* Scale the input and then store the result in the destination buffer. */
|
blkCnt--;
|
||||||
in = *pSrc++;
|
}
|
||||||
in = ((q63_t) in * scaleFract) >> 32;
|
}
|
||||||
|
else
|
||||||
out = in << kShift;
|
{
|
||||||
|
while (blkCnt > 0U)
|
||||||
if(in != (out >> kShift))
|
{
|
||||||
out = 0x7FFFFFFF ^ (in >> 31);
|
/* C = A * scale */
|
||||||
|
/* Scale the input and then store the result in the destination buffer. */
|
||||||
*pDst++ = out;
|
in = *pSrc++;
|
||||||
|
in = ((q63_t) in * scaleFract) >> 32;
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
out = in >> -kShift;
|
||||||
}
|
|
||||||
}
|
*pDst++ = out;
|
||||||
else
|
|
||||||
{
|
/* Decrement the loop counter */
|
||||||
while(blkCnt > 0u)
|
blkCnt--;
|
||||||
{
|
}
|
||||||
/* C = A * scale */
|
|
||||||
/* Scale the input and then store the result in the destination buffer. */
|
}
|
||||||
in = *pSrc++;
|
}
|
||||||
in = ((q63_t) in * scaleFract) >> 32;
|
|
||||||
|
/**
|
||||||
out = in >> -kShift;
|
* @} end of scale group
|
||||||
|
*/
|
||||||
*pDst++ = out;
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of scale group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,149 +1,137 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_scale_q7.c
|
||||||
* $Date: 12. March 2014
|
* Description: Multiplies a Q7 vector by a scalar
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_scale_q7.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Multiplies a Q7 vector by a scalar.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup scale
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Multiplies a Q7 vector by a scalar.
|
||||||
|
* @param[in] *pSrc points to the input vector
|
||||||
/**
|
* @param[in] scaleFract fractional portion of the scale value
|
||||||
* @ingroup groupMath
|
* @param[in] shift number of bits to shift the result by
|
||||||
*/
|
* @param[out] *pDst points to the output vector
|
||||||
|
* @param[in] blockSize number of samples in the vector
|
||||||
/**
|
* @return none.
|
||||||
* @addtogroup scale
|
*
|
||||||
* @{
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
*/
|
* \par
|
||||||
|
* The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.7 format.
|
||||||
/**
|
* These are multiplied to yield a 2.14 intermediate result and this is shifted with saturation to 1.7 format.
|
||||||
* @brief Multiplies a Q7 vector by a scalar.
|
*/
|
||||||
* @param[in] *pSrc points to the input vector
|
|
||||||
* @param[in] scaleFract fractional portion of the scale value
|
void arm_scale_q7(
|
||||||
* @param[in] shift number of bits to shift the result by
|
q7_t * pSrc,
|
||||||
* @param[out] *pDst points to the output vector
|
q7_t scaleFract,
|
||||||
* @param[in] blockSize number of samples in the vector
|
int8_t shift,
|
||||||
* @return none.
|
q7_t * pDst,
|
||||||
*
|
uint32_t blockSize)
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
{
|
||||||
* \par
|
int8_t kShift = 7 - shift; /* shift to apply after scaling */
|
||||||
* The input data <code>*pSrc</code> and <code>scaleFract</code> are in 1.7 format.
|
uint32_t blkCnt; /* loop counter */
|
||||||
* These are multiplied to yield a 2.14 intermediate result and this is shifted with saturation to 1.7 format.
|
|
||||||
*/
|
#if defined (ARM_MATH_DSP)
|
||||||
|
|
||||||
void arm_scale_q7(
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
q7_t * pSrc,
|
q7_t in1, in2, in3, in4, out1, out2, out3, out4; /* Temporary variables to store input & output */
|
||||||
q7_t scaleFract,
|
|
||||||
int8_t shift,
|
|
||||||
q7_t * pDst,
|
/*loop Unrolling */
|
||||||
uint32_t blockSize)
|
blkCnt = blockSize >> 2U;
|
||||||
{
|
|
||||||
int8_t kShift = 7 - shift; /* shift to apply after scaling */
|
|
||||||
uint32_t blkCnt; /* loop counter */
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* Reading 4 inputs from memory */
|
||||||
q7_t in1, in2, in3, in4, out1, out2, out3, out4; /* Temporary variables to store input & output */
|
in1 = *pSrc++;
|
||||||
|
in2 = *pSrc++;
|
||||||
|
in3 = *pSrc++;
|
||||||
/*loop Unrolling */
|
in4 = *pSrc++;
|
||||||
blkCnt = blockSize >> 2u;
|
|
||||||
|
/* C = A * scale */
|
||||||
|
/* Scale the inputs and then store the results in the temporary variables. */
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
out1 = (q7_t) (__SSAT(((in1) * scaleFract) >> kShift, 8));
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
out2 = (q7_t) (__SSAT(((in2) * scaleFract) >> kShift, 8));
|
||||||
while(blkCnt > 0u)
|
out3 = (q7_t) (__SSAT(((in3) * scaleFract) >> kShift, 8));
|
||||||
{
|
out4 = (q7_t) (__SSAT(((in4) * scaleFract) >> kShift, 8));
|
||||||
/* Reading 4 inputs from memory */
|
|
||||||
in1 = *pSrc++;
|
/* Packing the individual outputs into 32bit and storing in
|
||||||
in2 = *pSrc++;
|
* destination buffer in single write */
|
||||||
in3 = *pSrc++;
|
*__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4);
|
||||||
in4 = *pSrc++;
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* C = A * scale */
|
blkCnt--;
|
||||||
/* Scale the inputs and then store the results in the temporary variables. */
|
}
|
||||||
out1 = (q7_t) (__SSAT(((in1) * scaleFract) >> kShift, 8));
|
|
||||||
out2 = (q7_t) (__SSAT(((in2) * scaleFract) >> kShift, 8));
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
out3 = (q7_t) (__SSAT(((in3) * scaleFract) >> kShift, 8));
|
** No loop unrolling is used. */
|
||||||
out4 = (q7_t) (__SSAT(((in4) * scaleFract) >> kShift, 8));
|
blkCnt = blockSize % 0x4U;
|
||||||
|
|
||||||
/* Packing the individual outputs into 32bit and storing in
|
while (blkCnt > 0U)
|
||||||
* destination buffer in single write */
|
{
|
||||||
*__SIMD32(pDst)++ = __PACKq7(out1, out2, out3, out4);
|
/* C = A * scale */
|
||||||
|
/* Scale the input and then store the result in the destination buffer. */
|
||||||
/* Decrement the loop counter */
|
*pDst++ = (q7_t) (__SSAT(((*pSrc++) * scaleFract) >> kShift, 8));
|
||||||
blkCnt--;
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
}
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
#else
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/* Run the below code for Cortex-M0 */
|
||||||
{
|
|
||||||
/* C = A * scale */
|
/* Initialize blkCnt with number of samples */
|
||||||
/* Scale the input and then store the result in the destination buffer. */
|
blkCnt = blockSize;
|
||||||
*pDst++ = (q7_t) (__SSAT(((*pSrc++) * scaleFract) >> kShift, 8));
|
|
||||||
|
while (blkCnt > 0U)
|
||||||
/* Decrement the loop counter */
|
{
|
||||||
blkCnt--;
|
/* C = A * scale */
|
||||||
}
|
/* Scale the input and then store the result in the destination buffer. */
|
||||||
|
*pDst++ = (q7_t) (__SSAT((((q15_t) * pSrc++ * scaleFract) >> kShift), 8));
|
||||||
#else
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* Run the below code for Cortex-M0 */
|
blkCnt--;
|
||||||
|
}
|
||||||
/* Initialize blkCnt with number of samples */
|
|
||||||
blkCnt = blockSize;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
}
|
||||||
{
|
|
||||||
/* C = A * scale */
|
/**
|
||||||
/* Scale the input and then store the result in the destination buffer. */
|
* @} end of scale group
|
||||||
*pDst++ = (q7_t) (__SSAT((((q15_t) * pSrc++ * scaleFract) >> kShift), 8));
|
*/
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of scale group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,248 +1,236 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_shift_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Shifts the elements of a Q15 vector by a specified number of bits
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_shift_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Shifts the elements of a Q15 vector by a specified number of bits.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup shift
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Shifts the elements of a Q15 vector a specified number of bits.
|
||||||
|
* @param[in] *pSrc points to the input vector
|
||||||
/**
|
* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
|
||||||
* @ingroup groupMath
|
* @param[out] *pDst points to the output vector
|
||||||
*/
|
* @param[in] blockSize number of samples in the vector
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup shift
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The function uses saturating arithmetic.
|
||||||
|
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
||||||
/**
|
*/
|
||||||
* @brief Shifts the elements of a Q15 vector a specified number of bits.
|
|
||||||
* @param[in] *pSrc points to the input vector
|
void arm_shift_q15(
|
||||||
* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
|
q15_t * pSrc,
|
||||||
* @param[out] *pDst points to the output vector
|
int8_t shiftBits,
|
||||||
* @param[in] blockSize number of samples in the vector
|
q15_t * pDst,
|
||||||
* @return none.
|
uint32_t blockSize)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blkCnt; /* loop counter */
|
||||||
* \par
|
uint8_t sign; /* Sign of shiftBits */
|
||||||
* The function uses saturating arithmetic.
|
|
||||||
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
#if defined (ARM_MATH_DSP)
|
||||||
*/
|
|
||||||
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
void arm_shift_q15(
|
|
||||||
q15_t * pSrc,
|
q15_t in1, in2; /* Temporary variables */
|
||||||
int8_t shiftBits,
|
|
||||||
q15_t * pDst,
|
|
||||||
uint32_t blockSize)
|
/*loop Unrolling */
|
||||||
{
|
blkCnt = blockSize >> 2U;
|
||||||
uint32_t blkCnt; /* loop counter */
|
|
||||||
uint8_t sign; /* Sign of shiftBits */
|
/* Getting the sign of shiftBits */
|
||||||
|
sign = (shiftBits & 0x80);
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
|
||||||
|
/* If the shift value is positive then do right shift else left shift */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
if (sign == 0U)
|
||||||
|
{
|
||||||
q15_t in1, in2; /* Temporary variables */
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
/*loop Unrolling */
|
{
|
||||||
blkCnt = blockSize >> 2u;
|
/* Read 2 inputs */
|
||||||
|
in1 = *pSrc++;
|
||||||
/* Getting the sign of shiftBits */
|
in2 = *pSrc++;
|
||||||
sign = (shiftBits & 0x80);
|
/* C = A << shiftBits */
|
||||||
|
/* Shift the inputs and then store the results in the destination buffer. */
|
||||||
/* If the shift value is positive then do right shift else left shift */
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
if(sign == 0u)
|
|
||||||
{
|
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in1 << shiftBits), 16),
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
__SSAT((in2 << shiftBits), 16), 16);
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
|
||||||
while(blkCnt > 0u)
|
#else
|
||||||
{
|
|
||||||
/* Read 2 inputs */
|
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in2 << shiftBits), 16),
|
||||||
in1 = *pSrc++;
|
__SSAT((in1 << shiftBits), 16), 16);
|
||||||
in2 = *pSrc++;
|
|
||||||
/* C = A << shiftBits */
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
/* Shift the inputs and then store the results in the destination buffer. */
|
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
in1 = *pSrc++;
|
||||||
|
in2 = *pSrc++;
|
||||||
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in1 << shiftBits), 16),
|
|
||||||
__SSAT((in2 << shiftBits), 16), 16);
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
|
|
||||||
#else
|
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in1 << shiftBits), 16),
|
||||||
|
__SSAT((in2 << shiftBits), 16), 16);
|
||||||
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in2 << shiftBits), 16),
|
|
||||||
__SSAT((in1 << shiftBits), 16), 16);
|
#else
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in2 << shiftBits), 16),
|
||||||
|
__SSAT((in1 << shiftBits), 16), 16);
|
||||||
in1 = *pSrc++;
|
|
||||||
in2 = *pSrc++;
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
|
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in1 << shiftBits), 16),
|
}
|
||||||
__SSAT((in2 << shiftBits), 16), 16);
|
|
||||||
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
#else
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = blockSize % 0x4U;
|
||||||
*__SIMD32(pDst)++ = __PKHBT(__SSAT((in2 << shiftBits), 16),
|
|
||||||
__SSAT((in1 << shiftBits), 16), 16);
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
/* C = A << shiftBits */
|
||||||
|
/* Shift and then store the results in the destination buffer. */
|
||||||
/* Decrement the loop counter */
|
*pDst++ = __SSAT((*pSrc++ << shiftBits), 16);
|
||||||
blkCnt--;
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
}
|
||||||
** No loop unrolling is used. */
|
}
|
||||||
blkCnt = blockSize % 0x4u;
|
else
|
||||||
|
{
|
||||||
while(blkCnt > 0u)
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
{
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
/* C = A << shiftBits */
|
while (blkCnt > 0U)
|
||||||
/* Shift and then store the results in the destination buffer. */
|
{
|
||||||
*pDst++ = __SSAT((*pSrc++ << shiftBits), 16);
|
/* Read 2 inputs */
|
||||||
|
in1 = *pSrc++;
|
||||||
/* Decrement the loop counter */
|
in2 = *pSrc++;
|
||||||
blkCnt--;
|
|
||||||
}
|
/* C = A >> shiftBits */
|
||||||
}
|
/* Shift the inputs and then store the results in the destination buffer. */
|
||||||
else
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
{
|
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
*__SIMD32(pDst)++ = __PKHBT((in1 >> -shiftBits),
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
(in2 >> -shiftBits), 16);
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
#else
|
||||||
/* Read 2 inputs */
|
|
||||||
in1 = *pSrc++;
|
*__SIMD32(pDst)++ = __PKHBT((in2 >> -shiftBits),
|
||||||
in2 = *pSrc++;
|
(in1 >> -shiftBits), 16);
|
||||||
|
|
||||||
/* C = A >> shiftBits */
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
/* Shift the inputs and then store the results in the destination buffer. */
|
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
in1 = *pSrc++;
|
||||||
|
in2 = *pSrc++;
|
||||||
*__SIMD32(pDst)++ = __PKHBT((in1 >> -shiftBits),
|
|
||||||
(in2 >> -shiftBits), 16);
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
|
|
||||||
#else
|
*__SIMD32(pDst)++ = __PKHBT((in1 >> -shiftBits),
|
||||||
|
(in2 >> -shiftBits), 16);
|
||||||
*__SIMD32(pDst)++ = __PKHBT((in2 >> -shiftBits),
|
|
||||||
(in1 >> -shiftBits), 16);
|
#else
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
*__SIMD32(pDst)++ = __PKHBT((in2 >> -shiftBits),
|
||||||
|
(in1 >> -shiftBits), 16);
|
||||||
in1 = *pSrc++;
|
|
||||||
in2 = *pSrc++;
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
|
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
*__SIMD32(pDst)++ = __PKHBT((in1 >> -shiftBits),
|
}
|
||||||
(in2 >> -shiftBits), 16);
|
|
||||||
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
#else
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = blockSize % 0x4U;
|
||||||
*__SIMD32(pDst)++ = __PKHBT((in2 >> -shiftBits),
|
|
||||||
(in1 >> -shiftBits), 16);
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
/* C = A >> shiftBits */
|
||||||
|
/* Shift the inputs and then store the results in the destination buffer. */
|
||||||
/* Decrement the loop counter */
|
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||||
blkCnt--;
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
}
|
||||||
** No loop unrolling is used. */
|
}
|
||||||
blkCnt = blockSize % 0x4u;
|
|
||||||
|
#else
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
/* Run the below code for Cortex-M0 */
|
||||||
/* C = A >> shiftBits */
|
|
||||||
/* Shift the inputs and then store the results in the destination buffer. */
|
/* Getting the sign of shiftBits */
|
||||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
sign = (shiftBits & 0x80);
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
/* If the shift value is positive then do right shift else left shift */
|
||||||
blkCnt--;
|
if (sign == 0U)
|
||||||
}
|
{
|
||||||
}
|
/* Initialize blkCnt with number of samples */
|
||||||
|
blkCnt = blockSize;
|
||||||
#else
|
|
||||||
|
while (blkCnt > 0U)
|
||||||
/* Run the below code for Cortex-M0 */
|
{
|
||||||
|
/* C = A << shiftBits */
|
||||||
/* Getting the sign of shiftBits */
|
/* Shift and then store the results in the destination buffer. */
|
||||||
sign = (shiftBits & 0x80);
|
*pDst++ = __SSAT(((q31_t) * pSrc++ << shiftBits), 16);
|
||||||
|
|
||||||
/* If the shift value is positive then do right shift else left shift */
|
/* Decrement the loop counter */
|
||||||
if(sign == 0u)
|
blkCnt--;
|
||||||
{
|
}
|
||||||
/* Initialize blkCnt with number of samples */
|
}
|
||||||
blkCnt = blockSize;
|
else
|
||||||
|
{
|
||||||
while(blkCnt > 0u)
|
/* Initialize blkCnt with number of samples */
|
||||||
{
|
blkCnt = blockSize;
|
||||||
/* C = A << shiftBits */
|
|
||||||
/* Shift and then store the results in the destination buffer. */
|
while (blkCnt > 0U)
|
||||||
*pDst++ = __SSAT(((q31_t) * pSrc++ << shiftBits), 16);
|
{
|
||||||
|
/* C = A >> shiftBits */
|
||||||
/* Decrement the loop counter */
|
/* Shift the inputs and then store the results in the destination buffer. */
|
||||||
blkCnt--;
|
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||||
}
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
else
|
blkCnt--;
|
||||||
{
|
}
|
||||||
/* Initialize blkCnt with number of samples */
|
}
|
||||||
blkCnt = blockSize;
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
}
|
||||||
/* C = A >> shiftBits */
|
|
||||||
/* Shift the inputs and then store the results in the destination buffer. */
|
/**
|
||||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
* @} end of shift group
|
||||||
|
*/
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of shift group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,203 +1,191 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_shift_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Shifts the elements of a Q31 vector by a specified number of bits
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_shift_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Shifts the elements of a Q31 vector by a specified number of bits.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
/**
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
* @defgroup shift Vector Shift
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
*
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* Shifts the elements of a fixed-point vector by a specified number of bits.
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* There are separate functions for Q7, Q15, and Q31 data types.
|
||||||
* -------------------------------------------------------------------- */
|
* The underlying algorithm used is:
|
||||||
|
*
|
||||||
#include "arm_math.h"
|
* <pre>
|
||||||
|
* pDst[n] = pSrc[n] << shift, 0 <= n < blockSize.
|
||||||
/**
|
* </pre>
|
||||||
* @ingroup groupMath
|
*
|
||||||
*/
|
* If <code>shift</code> is positive then the elements of the vector are shifted to the left.
|
||||||
/**
|
* If <code>shift</code> is negative then the elements of the vector are shifted to the right.
|
||||||
* @defgroup shift Vector Shift
|
*
|
||||||
*
|
* The functions support in-place computation allowing the source and destination
|
||||||
* Shifts the elements of a fixed-point vector by a specified number of bits.
|
* pointers to reference the same memory buffer.
|
||||||
* There are separate functions for Q7, Q15, and Q31 data types.
|
*/
|
||||||
* The underlying algorithm used is:
|
|
||||||
*
|
/**
|
||||||
* <pre>
|
* @addtogroup shift
|
||||||
* pDst[n] = pSrc[n] << shift, 0 <= n < blockSize.
|
* @{
|
||||||
* </pre>
|
*/
|
||||||
*
|
|
||||||
* If <code>shift</code> is positive then the elements of the vector are shifted to the left.
|
/**
|
||||||
* If <code>shift</code> is negative then the elements of the vector are shifted to the right.
|
* @brief Shifts the elements of a Q31 vector a specified number of bits.
|
||||||
*
|
* @param[in] *pSrc points to the input vector
|
||||||
* The functions support in-place computation allowing the source and destination
|
* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
|
||||||
* pointers to reference the same memory buffer.
|
* @param[out] *pDst points to the output vector
|
||||||
*/
|
* @param[in] blockSize number of samples in the vector
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup shift
|
*
|
||||||
* @{
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
*/
|
* \par
|
||||||
|
* The function uses saturating arithmetic.
|
||||||
/**
|
* Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] will be saturated.
|
||||||
* @brief Shifts the elements of a Q31 vector a specified number of bits.
|
*/
|
||||||
* @param[in] *pSrc points to the input vector
|
|
||||||
* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
|
void arm_shift_q31(
|
||||||
* @param[out] *pDst points to the output vector
|
q31_t * pSrc,
|
||||||
* @param[in] blockSize number of samples in the vector
|
int8_t shiftBits,
|
||||||
* @return none.
|
q31_t * pDst,
|
||||||
*
|
uint32_t blockSize)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blkCnt; /* loop counter */
|
||||||
* \par
|
uint8_t sign = (shiftBits & 0x80); /* Sign of shiftBits */
|
||||||
* The function uses saturating arithmetic.
|
|
||||||
* Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] will be saturated.
|
#if defined (ARM_MATH_DSP)
|
||||||
*/
|
|
||||||
|
q31_t in1, in2, in3, in4; /* Temporary input variables */
|
||||||
void arm_shift_q31(
|
q31_t out1, out2, out3, out4; /* Temporary output variables */
|
||||||
q31_t * pSrc,
|
|
||||||
int8_t shiftBits,
|
/*loop Unrolling */
|
||||||
q31_t * pDst,
|
blkCnt = blockSize >> 2U;
|
||||||
uint32_t blockSize)
|
|
||||||
{
|
|
||||||
uint32_t blkCnt; /* loop counter */
|
if (sign == 0U)
|
||||||
uint8_t sign = (shiftBits & 0x80); /* Sign of shiftBits */
|
{
|
||||||
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
q31_t in1, in2, in3, in4; /* Temporary input variables */
|
{
|
||||||
q31_t out1, out2, out3, out4; /* Temporary output variables */
|
/* C = A << shiftBits */
|
||||||
|
/* Shift the input and then store the results in the destination buffer. */
|
||||||
/*loop Unrolling */
|
in1 = *pSrc;
|
||||||
blkCnt = blockSize >> 2u;
|
in2 = *(pSrc + 1);
|
||||||
|
out1 = in1 << shiftBits;
|
||||||
|
in3 = *(pSrc + 2);
|
||||||
if(sign == 0u)
|
out2 = in2 << shiftBits;
|
||||||
{
|
in4 = *(pSrc + 3);
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
if (in1 != (out1 >> shiftBits))
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
out1 = 0x7FFFFFFF ^ (in1 >> 31);
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
if (in2 != (out2 >> shiftBits))
|
||||||
/* C = A << shiftBits */
|
out2 = 0x7FFFFFFF ^ (in2 >> 31);
|
||||||
/* Shift the input and then store the results in the destination buffer. */
|
|
||||||
in1 = *pSrc;
|
*pDst = out1;
|
||||||
in2 = *(pSrc + 1);
|
out3 = in3 << shiftBits;
|
||||||
out1 = in1 << shiftBits;
|
*(pDst + 1) = out2;
|
||||||
in3 = *(pSrc + 2);
|
out4 = in4 << shiftBits;
|
||||||
out2 = in2 << shiftBits;
|
|
||||||
in4 = *(pSrc + 3);
|
if (in3 != (out3 >> shiftBits))
|
||||||
if(in1 != (out1 >> shiftBits))
|
out3 = 0x7FFFFFFF ^ (in3 >> 31);
|
||||||
out1 = 0x7FFFFFFF ^ (in1 >> 31);
|
|
||||||
|
if (in4 != (out4 >> shiftBits))
|
||||||
if(in2 != (out2 >> shiftBits))
|
out4 = 0x7FFFFFFF ^ (in4 >> 31);
|
||||||
out2 = 0x7FFFFFFF ^ (in2 >> 31);
|
|
||||||
|
*(pDst + 2) = out3;
|
||||||
*pDst = out1;
|
*(pDst + 3) = out4;
|
||||||
out3 = in3 << shiftBits;
|
|
||||||
*(pDst + 1) = out2;
|
/* Update destination pointer to process next sampels */
|
||||||
out4 = in4 << shiftBits;
|
pSrc += 4U;
|
||||||
|
pDst += 4U;
|
||||||
if(in3 != (out3 >> shiftBits))
|
|
||||||
out3 = 0x7FFFFFFF ^ (in3 >> 31);
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
if(in4 != (out4 >> shiftBits))
|
}
|
||||||
out4 = 0x7FFFFFFF ^ (in4 >> 31);
|
}
|
||||||
|
else
|
||||||
*(pDst + 2) = out3;
|
{
|
||||||
*(pDst + 3) = out4;
|
|
||||||
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
/* Update destination pointer to process next sampels */
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
pSrc += 4u;
|
while (blkCnt > 0U)
|
||||||
pDst += 4u;
|
{
|
||||||
|
/* C = A >> shiftBits */
|
||||||
/* Decrement the loop counter */
|
/* Shift the input and then store the results in the destination buffer. */
|
||||||
blkCnt--;
|
in1 = *pSrc;
|
||||||
}
|
in2 = *(pSrc + 1);
|
||||||
}
|
in3 = *(pSrc + 2);
|
||||||
else
|
in4 = *(pSrc + 3);
|
||||||
{
|
|
||||||
|
*pDst = (in1 >> -shiftBits);
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
*(pDst + 1) = (in2 >> -shiftBits);
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
*(pDst + 2) = (in3 >> -shiftBits);
|
||||||
while(blkCnt > 0u)
|
*(pDst + 3) = (in4 >> -shiftBits);
|
||||||
{
|
|
||||||
/* C = A >> shiftBits */
|
|
||||||
/* Shift the input and then store the results in the destination buffer. */
|
pSrc += 4U;
|
||||||
in1 = *pSrc;
|
pDst += 4U;
|
||||||
in2 = *(pSrc + 1);
|
|
||||||
in3 = *(pSrc + 2);
|
blkCnt--;
|
||||||
in4 = *(pSrc + 3);
|
}
|
||||||
|
|
||||||
*pDst = (in1 >> -shiftBits);
|
}
|
||||||
*(pDst + 1) = (in2 >> -shiftBits);
|
|
||||||
*(pDst + 2) = (in3 >> -shiftBits);
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
*(pDst + 3) = (in4 >> -shiftBits);
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = blockSize % 0x4U;
|
||||||
|
|
||||||
pSrc += 4u;
|
#else
|
||||||
pDst += 4u;
|
|
||||||
|
/* Run the below code for Cortex-M0 */
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
/* Initialize blkCnt with number of samples */
|
||||||
}
|
blkCnt = blockSize;
|
||||||
|
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
|
||||||
|
while (blkCnt > 0U)
|
||||||
#else
|
{
|
||||||
|
/* C = A (>> or <<) shiftBits */
|
||||||
/* Run the below code for Cortex-M0 */
|
/* Shift the input and then store the result in the destination buffer. */
|
||||||
|
*pDst++ = (sign == 0U) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) :
|
||||||
|
(*pSrc++ >> -shiftBits);
|
||||||
/* Initialize blkCnt with number of samples */
|
|
||||||
blkCnt = blockSize;
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
}
|
||||||
|
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
}
|
||||||
{
|
|
||||||
/* C = A (>> or <<) shiftBits */
|
/**
|
||||||
/* Shift the input and then store the result in the destination buffer. */
|
* @} end of shift group
|
||||||
*pDst++ = (sign == 0u) ? clip_q63_to_q31((q63_t) * pSrc++ << shiftBits) :
|
*/
|
||||||
(*pSrc++ >> -shiftBits);
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of shift group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,220 +1,208 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_shift_q7.c
|
||||||
* $Date: 12. March 2014
|
* Description: Processing function for the Q7 Shifting
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_shift_q7.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Processing function for the Q7 Shifting
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup shift
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_math.h"
|
/**
|
||||||
|
* @brief Shifts the elements of a Q7 vector a specified number of bits.
|
||||||
/**
|
* @param[in] *pSrc points to the input vector
|
||||||
* @ingroup groupMath
|
* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
|
||||||
*/
|
* @param[out] *pDst points to the output vector
|
||||||
|
* @param[in] blockSize number of samples in the vector
|
||||||
/**
|
* @return none.
|
||||||
* @addtogroup shift
|
*
|
||||||
* @{
|
* \par Conditions for optimum performance
|
||||||
*/
|
* Input and output buffers should be aligned by 32-bit
|
||||||
|
*
|
||||||
|
*
|
||||||
/**
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @brief Shifts the elements of a Q7 vector a specified number of bits.
|
* \par
|
||||||
* @param[in] *pSrc points to the input vector
|
* The function uses saturating arithmetic.
|
||||||
* @param[in] shiftBits number of bits to shift. A positive value shifts left; a negative value shifts right.
|
* Results outside of the allowable Q7 range [0x8 0x7F] will be saturated.
|
||||||
* @param[out] *pDst points to the output vector
|
*/
|
||||||
* @param[in] blockSize number of samples in the vector
|
|
||||||
* @return none.
|
void arm_shift_q7(
|
||||||
*
|
q7_t * pSrc,
|
||||||
* \par Conditions for optimum performance
|
int8_t shiftBits,
|
||||||
* Input and output buffers should be aligned by 32-bit
|
q7_t * pDst,
|
||||||
*
|
uint32_t blockSize)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blkCnt; /* loop counter */
|
||||||
* \par
|
uint8_t sign; /* Sign of shiftBits */
|
||||||
* The function uses saturating arithmetic.
|
|
||||||
* Results outside of the allowable Q7 range [0x8 0x7F] will be saturated.
|
#if defined (ARM_MATH_DSP)
|
||||||
*/
|
|
||||||
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
void arm_shift_q7(
|
q7_t in1; /* Input value1 */
|
||||||
q7_t * pSrc,
|
q7_t in2; /* Input value2 */
|
||||||
int8_t shiftBits,
|
q7_t in3; /* Input value3 */
|
||||||
q7_t * pDst,
|
q7_t in4; /* Input value4 */
|
||||||
uint32_t blockSize)
|
|
||||||
{
|
|
||||||
uint32_t blkCnt; /* loop counter */
|
/*loop Unrolling */
|
||||||
uint8_t sign; /* Sign of shiftBits */
|
blkCnt = blockSize >> 2U;
|
||||||
|
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* Getting the sign of shiftBits */
|
||||||
|
sign = (shiftBits & 0x80);
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
|
||||||
q7_t in1; /* Input value1 */
|
/* If the shift value is positive then do right shift else left shift */
|
||||||
q7_t in2; /* Input value2 */
|
if (sign == 0U)
|
||||||
q7_t in3; /* Input value3 */
|
{
|
||||||
q7_t in4; /* Input value4 */
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
/*loop Unrolling */
|
{
|
||||||
blkCnt = blockSize >> 2u;
|
/* C = A << shiftBits */
|
||||||
|
/* Read 4 inputs */
|
||||||
/* Getting the sign of shiftBits */
|
in1 = *pSrc;
|
||||||
sign = (shiftBits & 0x80);
|
in2 = *(pSrc + 1);
|
||||||
|
in3 = *(pSrc + 2);
|
||||||
/* If the shift value is positive then do right shift else left shift */
|
in4 = *(pSrc + 3);
|
||||||
if(sign == 0u)
|
|
||||||
{
|
/* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
*__SIMD32(pDst)++ = __PACKq7(__SSAT((in1 << shiftBits), 8),
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
__SSAT((in2 << shiftBits), 8),
|
||||||
while(blkCnt > 0u)
|
__SSAT((in3 << shiftBits), 8),
|
||||||
{
|
__SSAT((in4 << shiftBits), 8));
|
||||||
/* C = A << shiftBits */
|
/* Update source pointer to process next sampels */
|
||||||
/* Read 4 inputs */
|
pSrc += 4U;
|
||||||
in1 = *pSrc;
|
|
||||||
in2 = *(pSrc + 1);
|
/* Decrement the loop counter */
|
||||||
in3 = *(pSrc + 2);
|
blkCnt--;
|
||||||
in4 = *(pSrc + 3);
|
}
|
||||||
|
|
||||||
/* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
*__SIMD32(pDst)++ = __PACKq7(__SSAT((in1 << shiftBits), 8),
|
** No loop unrolling is used. */
|
||||||
__SSAT((in2 << shiftBits), 8),
|
blkCnt = blockSize % 0x4U;
|
||||||
__SSAT((in3 << shiftBits), 8),
|
|
||||||
__SSAT((in4 << shiftBits), 8));
|
while (blkCnt > 0U)
|
||||||
/* Update source pointer to process next sampels */
|
{
|
||||||
pSrc += 4u;
|
/* C = A << shiftBits */
|
||||||
|
/* Shift the input and then store the result in the destination buffer. */
|
||||||
/* Decrement the loop counter */
|
*pDst++ = (q7_t) __SSAT((*pSrc++ << shiftBits), 8);
|
||||||
blkCnt--;
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
}
|
||||||
** No loop unrolling is used. */
|
}
|
||||||
blkCnt = blockSize % 0x4u;
|
else
|
||||||
|
{
|
||||||
while(blkCnt > 0u)
|
shiftBits = -shiftBits;
|
||||||
{
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
/* C = A << shiftBits */
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
/* Shift the input and then store the result in the destination buffer. */
|
while (blkCnt > 0U)
|
||||||
*pDst++ = (q7_t) __SSAT((*pSrc++ << shiftBits), 8);
|
{
|
||||||
|
/* C = A >> shiftBits */
|
||||||
/* Decrement the loop counter */
|
/* Read 4 inputs */
|
||||||
blkCnt--;
|
in1 = *pSrc;
|
||||||
}
|
in2 = *(pSrc + 1);
|
||||||
}
|
in3 = *(pSrc + 2);
|
||||||
else
|
in4 = *(pSrc + 3);
|
||||||
{
|
|
||||||
shiftBits = -shiftBits;
|
/* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
*__SIMD32(pDst)++ = __PACKq7((in1 >> shiftBits), (in2 >> shiftBits),
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
(in3 >> shiftBits), (in4 >> shiftBits));
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
|
||||||
/* C = A >> shiftBits */
|
pSrc += 4U;
|
||||||
/* Read 4 inputs */
|
|
||||||
in1 = *pSrc;
|
/* Decrement the loop counter */
|
||||||
in2 = *(pSrc + 1);
|
blkCnt--;
|
||||||
in3 = *(pSrc + 2);
|
}
|
||||||
in4 = *(pSrc + 3);
|
|
||||||
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
/* Store the Shifted result in the destination buffer in single cycle by packing the outputs */
|
** No loop unrolling is used. */
|
||||||
*__SIMD32(pDst)++ = __PACKq7((in1 >> shiftBits), (in2 >> shiftBits),
|
blkCnt = blockSize % 0x4U;
|
||||||
(in3 >> shiftBits), (in4 >> shiftBits));
|
|
||||||
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
pSrc += 4u;
|
/* C = A >> shiftBits */
|
||||||
|
/* Shift the input and then store the result in the destination buffer. */
|
||||||
/* Decrement the loop counter */
|
in1 = *pSrc++;
|
||||||
blkCnt--;
|
*pDst++ = (in1 >> shiftBits);
|
||||||
}
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
blkCnt--;
|
||||||
** No loop unrolling is used. */
|
}
|
||||||
blkCnt = blockSize % 0x4u;
|
}
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
#else
|
||||||
{
|
|
||||||
/* C = A >> shiftBits */
|
/* Run the below code for Cortex-M0 */
|
||||||
/* Shift the input and then store the result in the destination buffer. */
|
|
||||||
in1 = *pSrc++;
|
/* Getting the sign of shiftBits */
|
||||||
*pDst++ = (in1 >> shiftBits);
|
sign = (shiftBits & 0x80);
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
/* If the shift value is positive then do right shift else left shift */
|
||||||
blkCnt--;
|
if (sign == 0U)
|
||||||
}
|
{
|
||||||
}
|
/* Initialize blkCnt with number of samples */
|
||||||
|
blkCnt = blockSize;
|
||||||
#else
|
|
||||||
|
while (blkCnt > 0U)
|
||||||
/* Run the below code for Cortex-M0 */
|
{
|
||||||
|
/* C = A << shiftBits */
|
||||||
/* Getting the sign of shiftBits */
|
/* Shift the input and then store the result in the destination buffer. */
|
||||||
sign = (shiftBits & 0x80);
|
*pDst++ = (q7_t) __SSAT(((q15_t) * pSrc++ << shiftBits), 8);
|
||||||
|
|
||||||
/* If the shift value is positive then do right shift else left shift */
|
/* Decrement the loop counter */
|
||||||
if(sign == 0u)
|
blkCnt--;
|
||||||
{
|
}
|
||||||
/* Initialize blkCnt with number of samples */
|
}
|
||||||
blkCnt = blockSize;
|
else
|
||||||
|
{
|
||||||
while(blkCnt > 0u)
|
/* Initialize blkCnt with number of samples */
|
||||||
{
|
blkCnt = blockSize;
|
||||||
/* C = A << shiftBits */
|
|
||||||
/* Shift the input and then store the result in the destination buffer. */
|
while (blkCnt > 0U)
|
||||||
*pDst++ = (q7_t) __SSAT(((q15_t) * pSrc++ << shiftBits), 8);
|
{
|
||||||
|
/* C = A >> shiftBits */
|
||||||
/* Decrement the loop counter */
|
/* Shift the input and then store the result in the destination buffer. */
|
||||||
blkCnt--;
|
*pDst++ = (*pSrc++ >> -shiftBits);
|
||||||
}
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
else
|
blkCnt--;
|
||||||
{
|
}
|
||||||
/* Initialize blkCnt with number of samples */
|
}
|
||||||
blkCnt = blockSize;
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
while(blkCnt > 0u)
|
}
|
||||||
{
|
|
||||||
/* C = A >> shiftBits */
|
/**
|
||||||
/* Shift the input and then store the result in the destination buffer. */
|
* @} end of shift group
|
||||||
*pDst++ = (*pSrc++ >> -shiftBits);
|
*/
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of shift group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,150 +1,138 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_sub_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Floating-point vector subtraction.
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_sub_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Floating-point vector subtraction.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @defgroup BasicSub Vector Subtraction
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* Element-by-element subtraction of two vectors.
|
||||||
* ---------------------------------------------------------------------------- */
|
*
|
||||||
|
* <pre>
|
||||||
#include "arm_math.h"
|
* pDst[n] = pSrcA[n] - pSrcB[n], 0 <= n < blockSize.
|
||||||
|
* </pre>
|
||||||
/**
|
*
|
||||||
* @ingroup groupMath
|
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @defgroup BasicSub Vector Subtraction
|
* @addtogroup BasicSub
|
||||||
*
|
* @{
|
||||||
* Element-by-element subtraction of two vectors.
|
*/
|
||||||
*
|
|
||||||
* <pre>
|
|
||||||
* pDst[n] = pSrcA[n] - pSrcB[n], 0 <= n < blockSize.
|
/**
|
||||||
* </pre>
|
* @brief Floating-point vector subtraction.
|
||||||
*
|
* @param[in] *pSrcA points to the first input vector
|
||||||
* There are separate functions for floating-point, Q7, Q15, and Q31 data types.
|
* @param[in] *pSrcB points to the second input vector
|
||||||
*/
|
* @param[out] *pDst points to the output vector
|
||||||
|
* @param[in] blockSize number of samples in each vector
|
||||||
/**
|
* @return none.
|
||||||
* @addtogroup BasicSub
|
*/
|
||||||
* @{
|
|
||||||
*/
|
void arm_sub_f32(
|
||||||
|
float32_t * pSrcA,
|
||||||
|
float32_t * pSrcB,
|
||||||
/**
|
float32_t * pDst,
|
||||||
* @brief Floating-point vector subtraction.
|
uint32_t blockSize)
|
||||||
* @param[in] *pSrcA points to the first input vector
|
{
|
||||||
* @param[in] *pSrcB points to the second input vector
|
uint32_t blkCnt; /* loop counter */
|
||||||
* @param[out] *pDst points to the output vector
|
|
||||||
* @param[in] blockSize number of samples in each vector
|
#if defined (ARM_MATH_DSP)
|
||||||
* @return none.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
float32_t inA1, inA2, inA3, inA4; /* temporary variables */
|
||||||
void arm_sub_f32(
|
float32_t inB1, inB2, inB3, inB4; /* temporary variables */
|
||||||
float32_t * pSrcA,
|
|
||||||
float32_t * pSrcB,
|
/*loop Unrolling */
|
||||||
float32_t * pDst,
|
blkCnt = blockSize >> 2U;
|
||||||
uint32_t blockSize)
|
|
||||||
{
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
uint32_t blkCnt; /* loop counter */
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
{
|
||||||
|
/* C = A - B */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* Subtract and then store the results in the destination buffer. */
|
||||||
float32_t inA1, inA2, inA3, inA4; /* temporary variables */
|
/* Read 4 input samples from sourceA and sourceB */
|
||||||
float32_t inB1, inB2, inB3, inB4; /* temporary variables */
|
inA1 = *pSrcA;
|
||||||
|
inB1 = *pSrcB;
|
||||||
/*loop Unrolling */
|
inA2 = *(pSrcA + 1);
|
||||||
blkCnt = blockSize >> 2u;
|
inB2 = *(pSrcB + 1);
|
||||||
|
inA3 = *(pSrcA + 2);
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
inB3 = *(pSrcB + 2);
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
inA4 = *(pSrcA + 3);
|
||||||
while(blkCnt > 0u)
|
inB4 = *(pSrcB + 3);
|
||||||
{
|
|
||||||
/* C = A - B */
|
/* dst = srcA - srcB */
|
||||||
/* Subtract and then store the results in the destination buffer. */
|
/* subtract and store the result */
|
||||||
/* Read 4 input samples from sourceA and sourceB */
|
*pDst = inA1 - inB1;
|
||||||
inA1 = *pSrcA;
|
*(pDst + 1) = inA2 - inB2;
|
||||||
inB1 = *pSrcB;
|
*(pDst + 2) = inA3 - inB3;
|
||||||
inA2 = *(pSrcA + 1);
|
*(pDst + 3) = inA4 - inB4;
|
||||||
inB2 = *(pSrcB + 1);
|
|
||||||
inA3 = *(pSrcA + 2);
|
|
||||||
inB3 = *(pSrcB + 2);
|
/* Update pointers to process next sampels */
|
||||||
inA4 = *(pSrcA + 3);
|
pSrcA += 4U;
|
||||||
inB4 = *(pSrcB + 3);
|
pSrcB += 4U;
|
||||||
|
pDst += 4U;
|
||||||
/* dst = srcA - srcB */
|
|
||||||
/* subtract and store the result */
|
/* Decrement the loop counter */
|
||||||
*pDst = inA1 - inB1;
|
blkCnt--;
|
||||||
*(pDst + 1) = inA2 - inB2;
|
}
|
||||||
*(pDst + 2) = inA3 - inB3;
|
|
||||||
*(pDst + 3) = inA4 - inB4;
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = blockSize % 0x4U;
|
||||||
/* Update pointers to process next sampels */
|
|
||||||
pSrcA += 4u;
|
#else
|
||||||
pSrcB += 4u;
|
|
||||||
pDst += 4u;
|
/* Run the below code for Cortex-M0 */
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
/* Initialize blkCnt with number of samples */
|
||||||
blkCnt--;
|
blkCnt = blockSize;
|
||||||
}
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
while (blkCnt > 0U)
|
||||||
blkCnt = blockSize % 0x4u;
|
{
|
||||||
|
/* C = A - B */
|
||||||
#else
|
/* Subtract and then store the results in the destination buffer. */
|
||||||
|
*pDst++ = (*pSrcA++) - (*pSrcB++);
|
||||||
/* Run the below code for Cortex-M0 */
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* Initialize blkCnt with number of samples */
|
blkCnt--;
|
||||||
blkCnt = blockSize;
|
}
|
||||||
|
}
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
/**
|
||||||
while(blkCnt > 0u)
|
* @} end of BasicSub group
|
||||||
{
|
*/
|
||||||
/* C = A - B */
|
|
||||||
/* Subtract and then store the results in the destination buffer. */
|
|
||||||
*pDst++ = (*pSrcA++) - (*pSrcB++);
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicSub group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,140 +1,128 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_sub_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q15 vector subtraction
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_sub_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q15 vector subtraction.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup BasicSub
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Q15 vector subtraction.
|
||||||
|
* @param[in] *pSrcA points to the first input vector
|
||||||
/**
|
* @param[in] *pSrcB points to the second input vector
|
||||||
* @ingroup groupMath
|
* @param[out] *pDst points to the output vector
|
||||||
*/
|
* @param[in] blockSize number of samples in each vector
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup BasicSub
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The function uses saturating arithmetic.
|
||||||
|
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
||||||
/**
|
*/
|
||||||
* @brief Q15 vector subtraction.
|
|
||||||
* @param[in] *pSrcA points to the first input vector
|
void arm_sub_q15(
|
||||||
* @param[in] *pSrcB points to the second input vector
|
q15_t * pSrcA,
|
||||||
* @param[out] *pDst points to the output vector
|
q15_t * pSrcB,
|
||||||
* @param[in] blockSize number of samples in each vector
|
q15_t * pDst,
|
||||||
* @return none.
|
uint32_t blockSize)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blkCnt; /* loop counter */
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
|
||||||
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
#if defined (ARM_MATH_DSP)
|
||||||
*/
|
|
||||||
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
void arm_sub_q15(
|
q31_t inA1, inA2;
|
||||||
q15_t * pSrcA,
|
q31_t inB1, inB2;
|
||||||
q15_t * pSrcB,
|
|
||||||
q15_t * pDst,
|
/*loop Unrolling */
|
||||||
uint32_t blockSize)
|
blkCnt = blockSize >> 2U;
|
||||||
{
|
|
||||||
uint32_t blkCnt; /* loop counter */
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
{
|
||||||
|
/* C = A - B */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* Subtract and then store the results in the destination buffer two samples at a time. */
|
||||||
q31_t inA1, inA2;
|
inA1 = *__SIMD32(pSrcA)++;
|
||||||
q31_t inB1, inB2;
|
inA2 = *__SIMD32(pSrcA)++;
|
||||||
|
inB1 = *__SIMD32(pSrcB)++;
|
||||||
/*loop Unrolling */
|
inB2 = *__SIMD32(pSrcB)++;
|
||||||
blkCnt = blockSize >> 2u;
|
|
||||||
|
*__SIMD32(pDst)++ = __QSUB16(inA1, inB1);
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
*__SIMD32(pDst)++ = __QSUB16(inA2, inB2);
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
|
||||||
while(blkCnt > 0u)
|
/* Decrement the loop counter */
|
||||||
{
|
blkCnt--;
|
||||||
/* C = A - B */
|
}
|
||||||
/* Subtract and then store the results in the destination buffer two samples at a time. */
|
|
||||||
inA1 = *__SIMD32(pSrcA)++;
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
inA2 = *__SIMD32(pSrcA)++;
|
** No loop unrolling is used. */
|
||||||
inB1 = *__SIMD32(pSrcB)++;
|
blkCnt = blockSize % 0x4U;
|
||||||
inB2 = *__SIMD32(pSrcB)++;
|
|
||||||
|
while (blkCnt > 0U)
|
||||||
*__SIMD32(pDst)++ = __QSUB16(inA1, inB1);
|
{
|
||||||
*__SIMD32(pDst)++ = __QSUB16(inA2, inB2);
|
/* C = A - B */
|
||||||
|
/* Subtract and then store the result in the destination buffer. */
|
||||||
/* Decrement the loop counter */
|
*pDst++ = (q15_t) __QSUB16(*pSrcA++, *pSrcB++);
|
||||||
blkCnt--;
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
}
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
#else
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/* Run the below code for Cortex-M0 */
|
||||||
{
|
|
||||||
/* C = A - B */
|
/* Initialize blkCnt with number of samples */
|
||||||
/* Subtract and then store the result in the destination buffer. */
|
blkCnt = blockSize;
|
||||||
*pDst++ = (q15_t) __QSUB16(*pSrcA++, *pSrcB++);
|
|
||||||
|
while (blkCnt > 0U)
|
||||||
/* Decrement the loop counter */
|
{
|
||||||
blkCnt--;
|
/* C = A - B */
|
||||||
}
|
/* Subtract and then store the result in the destination buffer. */
|
||||||
|
*pDst++ = (q15_t) __SSAT(((q31_t) * pSrcA++ - *pSrcB++), 16);
|
||||||
#else
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* Run the below code for Cortex-M0 */
|
blkCnt--;
|
||||||
|
}
|
||||||
/* Initialize blkCnt with number of samples */
|
|
||||||
blkCnt = blockSize;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
}
|
||||||
/* C = A - B */
|
|
||||||
/* Subtract and then store the result in the destination buffer. */
|
/**
|
||||||
*pDst++ = (q15_t) __SSAT(((q31_t) * pSrcA++ - *pSrcB++), 16);
|
* @} end of BasicSub group
|
||||||
|
*/
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicSub group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,146 +1,134 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_sub_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q31 vector subtraction
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_sub_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q31 vector subtraction.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup BasicSub
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Q31 vector subtraction.
|
||||||
|
* @param[in] *pSrcA points to the first input vector
|
||||||
/**
|
* @param[in] *pSrcB points to the second input vector
|
||||||
* @ingroup groupMath
|
* @param[out] *pDst points to the output vector
|
||||||
*/
|
* @param[in] blockSize number of samples in each vector
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup BasicSub
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The function uses saturating arithmetic.
|
||||||
|
* Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] will be saturated.
|
||||||
/**
|
*/
|
||||||
* @brief Q31 vector subtraction.
|
|
||||||
* @param[in] *pSrcA points to the first input vector
|
void arm_sub_q31(
|
||||||
* @param[in] *pSrcB points to the second input vector
|
q31_t * pSrcA,
|
||||||
* @param[out] *pDst points to the output vector
|
q31_t * pSrcB,
|
||||||
* @param[in] blockSize number of samples in each vector
|
q31_t * pDst,
|
||||||
* @return none.
|
uint32_t blockSize)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blkCnt; /* loop counter */
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
|
||||||
* Results outside of the allowable Q31 range [0x80000000 0x7FFFFFFF] will be saturated.
|
#if defined (ARM_MATH_DSP)
|
||||||
*/
|
|
||||||
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
void arm_sub_q31(
|
q31_t inA1, inA2, inA3, inA4;
|
||||||
q31_t * pSrcA,
|
q31_t inB1, inB2, inB3, inB4;
|
||||||
q31_t * pSrcB,
|
|
||||||
q31_t * pDst,
|
/*loop Unrolling */
|
||||||
uint32_t blockSize)
|
blkCnt = blockSize >> 2U;
|
||||||
{
|
|
||||||
uint32_t blkCnt; /* loop counter */
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
{
|
||||||
|
/* C = A - B */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* Subtract and then store the results in the destination buffer. */
|
||||||
q31_t inA1, inA2, inA3, inA4;
|
inA1 = *pSrcA++;
|
||||||
q31_t inB1, inB2, inB3, inB4;
|
inA2 = *pSrcA++;
|
||||||
|
inB1 = *pSrcB++;
|
||||||
/*loop Unrolling */
|
inB2 = *pSrcB++;
|
||||||
blkCnt = blockSize >> 2u;
|
|
||||||
|
inA3 = *pSrcA++;
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
inA4 = *pSrcA++;
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
inB3 = *pSrcB++;
|
||||||
while(blkCnt > 0u)
|
inB4 = *pSrcB++;
|
||||||
{
|
|
||||||
/* C = A - B */
|
*pDst++ = __QSUB(inA1, inB1);
|
||||||
/* Subtract and then store the results in the destination buffer. */
|
*pDst++ = __QSUB(inA2, inB2);
|
||||||
inA1 = *pSrcA++;
|
*pDst++ = __QSUB(inA3, inB3);
|
||||||
inA2 = *pSrcA++;
|
*pDst++ = __QSUB(inA4, inB4);
|
||||||
inB1 = *pSrcB++;
|
|
||||||
inB2 = *pSrcB++;
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
inA3 = *pSrcA++;
|
}
|
||||||
inA4 = *pSrcA++;
|
|
||||||
inB3 = *pSrcB++;
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
inB4 = *pSrcB++;
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = blockSize % 0x4U;
|
||||||
*pDst++ = __QSUB(inA1, inB1);
|
|
||||||
*pDst++ = __QSUB(inA2, inB2);
|
while (blkCnt > 0U)
|
||||||
*pDst++ = __QSUB(inA3, inB3);
|
{
|
||||||
*pDst++ = __QSUB(inA4, inB4);
|
/* C = A - B */
|
||||||
|
/* Subtract and then store the result in the destination buffer. */
|
||||||
/* Decrement the loop counter */
|
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
|
||||||
blkCnt--;
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
}
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
#else
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/* Run the below code for Cortex-M0 */
|
||||||
{
|
|
||||||
/* C = A - B */
|
/* Initialize blkCnt with number of samples */
|
||||||
/* Subtract and then store the result in the destination buffer. */
|
blkCnt = blockSize;
|
||||||
*pDst++ = __QSUB(*pSrcA++, *pSrcB++);
|
|
||||||
|
while (blkCnt > 0U)
|
||||||
/* Decrement the loop counter */
|
{
|
||||||
blkCnt--;
|
/* C = A - B */
|
||||||
}
|
/* Subtract and then store the result in the destination buffer. */
|
||||||
|
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrcA++ - *pSrcB++);
|
||||||
#else
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* Run the below code for Cortex-M0 */
|
blkCnt--;
|
||||||
|
}
|
||||||
/* Initialize blkCnt with number of samples */
|
|
||||||
blkCnt = blockSize;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
}
|
||||||
{
|
|
||||||
/* C = A - B */
|
/**
|
||||||
/* Subtract and then store the result in the destination buffer. */
|
* @} end of BasicSub group
|
||||||
*pDst++ = (q31_t) clip_q63_to_q31((q63_t) * pSrcA++ - *pSrcB++);
|
*/
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicSub group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,131 +1,119 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_sub_q7.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q7 vector subtraction
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_sub_q7.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q7 vector subtraction.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup BasicSub
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Q7 vector subtraction.
|
||||||
|
* @param[in] *pSrcA points to the first input vector
|
||||||
/**
|
* @param[in] *pSrcB points to the second input vector
|
||||||
* @ingroup groupMath
|
* @param[out] *pDst points to the output vector
|
||||||
*/
|
* @param[in] blockSize number of samples in each vector
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup BasicSub
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The function uses saturating arithmetic.
|
||||||
|
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
|
||||||
/**
|
*/
|
||||||
* @brief Q7 vector subtraction.
|
|
||||||
* @param[in] *pSrcA points to the first input vector
|
void arm_sub_q7(
|
||||||
* @param[in] *pSrcB points to the second input vector
|
q7_t * pSrcA,
|
||||||
* @param[out] *pDst points to the output vector
|
q7_t * pSrcB,
|
||||||
* @param[in] blockSize number of samples in each vector
|
q7_t * pDst,
|
||||||
* @return none.
|
uint32_t blockSize)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blkCnt; /* loop counter */
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
#if defined (ARM_MATH_DSP)
|
||||||
* Results outside of the allowable Q7 range [0x80 0x7F] will be saturated.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
|
||||||
void arm_sub_q7(
|
/*loop Unrolling */
|
||||||
q7_t * pSrcA,
|
blkCnt = blockSize >> 2U;
|
||||||
q7_t * pSrcB,
|
|
||||||
q7_t * pDst,
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
uint32_t blockSize)
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
{
|
while (blkCnt > 0U)
|
||||||
uint32_t blkCnt; /* loop counter */
|
{
|
||||||
|
/* C = A - B */
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* Subtract and then store the results in the destination buffer 4 samples at a time. */
|
||||||
|
*__SIMD32(pDst)++ = __QSUB8(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++);
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/*loop Unrolling */
|
blkCnt--;
|
||||||
blkCnt = blockSize >> 2u;
|
}
|
||||||
|
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
** No loop unrolling is used. */
|
||||||
while(blkCnt > 0u)
|
blkCnt = blockSize % 0x4U;
|
||||||
{
|
|
||||||
/* C = A - B */
|
while (blkCnt > 0U)
|
||||||
/* Subtract and then store the results in the destination buffer 4 samples at a time. */
|
{
|
||||||
*__SIMD32(pDst)++ = __QSUB8(*__SIMD32(pSrcA)++, *__SIMD32(pSrcB)++);
|
/* C = A - B */
|
||||||
|
/* Subtract and then store the result in the destination buffer. */
|
||||||
/* Decrement the loop counter */
|
*pDst++ = __SSAT(*pSrcA++ - *pSrcB++, 8);
|
||||||
blkCnt--;
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
}
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = blockSize % 0x4u;
|
#else
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/* Run the below code for Cortex-M0 */
|
||||||
{
|
|
||||||
/* C = A - B */
|
/* Initialize blkCnt with number of samples */
|
||||||
/* Subtract and then store the result in the destination buffer. */
|
blkCnt = blockSize;
|
||||||
*pDst++ = __SSAT(*pSrcA++ - *pSrcB++, 8);
|
|
||||||
|
while (blkCnt > 0U)
|
||||||
/* Decrement the loop counter */
|
{
|
||||||
blkCnt--;
|
/* C = A - B */
|
||||||
}
|
/* Subtract and then store the result in the destination buffer. */
|
||||||
|
*pDst++ = (q7_t) __SSAT((q15_t) * pSrcA++ - *pSrcB++, 8);
|
||||||
#else
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* Run the below code for Cortex-M0 */
|
blkCnt--;
|
||||||
|
}
|
||||||
/* Initialize blkCnt with number of samples */
|
|
||||||
blkCnt = blockSize;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
}
|
||||||
/* C = A - B */
|
|
||||||
/* Subtract and then store the result in the destination buffer. */
|
/**
|
||||||
*pDst++ = (q7_t) __SSAT((q15_t) * pSrcA++ - *pSrcB++, 8);
|
* @} end of BasicSub group
|
||||||
|
*/
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BasicSub group
|
|
||||||
*/
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,156 +1,379 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_const_structs.c
|
||||||
* $Date: 31. July 2014
|
* Description: Constant structs that are initialized for user convenience.
|
||||||
* $Revision: V1.4.4
|
* For example, some can be given as arguments to the arm_cfft_f32() or arm_rfft_f32() functions.
|
||||||
*
|
*
|
||||||
* Project: CMSIS DSP Library
|
* $Date: 27. January 2017
|
||||||
* Title: arm_const_structs.c
|
* $Revision: V.1.5.1
|
||||||
*
|
*
|
||||||
* Description: This file has constant structs that are initialized for
|
* Target Processor: Cortex-M cores
|
||||||
* user convenience. For example, some can be given as
|
* -------------------------------------------------------------------- */
|
||||||
* arguments to the arm_cfft_f32() function.
|
/*
|
||||||
*
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
* Target Processor: Cortex-M4/Cortex-M3
|
*
|
||||||
*
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* Redistribution and use in source and binary forms, with or without
|
*
|
||||||
* modification, are permitted provided that the following conditions
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* are met:
|
* not use this file except in compliance with the License.
|
||||||
* - Redistributions of source code must retain the above copyright
|
* You may obtain a copy of the License at
|
||||||
* notice, this list of conditions and the following disclaimer.
|
*
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* notice, this list of conditions and the following disclaimer in
|
*
|
||||||
* the documentation and/or other materials provided with the
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distribution.
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* may be used to endorse or promote products derived from this
|
* See the License for the specific language governing permissions and
|
||||||
* software without specific prior written permission.
|
* limitations under the License.
|
||||||
*
|
*/
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
#include "arm_const_structs.h"
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
/* Floating-point structs */
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
const arm_cfft_instance_f32 arm_cfft_sR_f32_len16 = {
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
16, twiddleCoef_16, armBitRevIndexTable16, ARMBITREVINDEXTABLE_16_TABLE_LENGTH
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
};
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
const arm_cfft_instance_f32 arm_cfft_sR_f32_len32 = {
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
32, twiddleCoef_32, armBitRevIndexTable32, ARMBITREVINDEXTABLE_32_TABLE_LENGTH
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
};
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
* -------------------------------------------------------------------- */
|
const arm_cfft_instance_f32 arm_cfft_sR_f32_len64 = {
|
||||||
|
64, twiddleCoef_64, armBitRevIndexTable64, ARMBITREVINDEXTABLE_64_TABLE_LENGTH
|
||||||
#include "arm_const_structs.h"
|
};
|
||||||
|
|
||||||
//Floating-point structs
|
const arm_cfft_instance_f32 arm_cfft_sR_f32_len128 = {
|
||||||
|
128, twiddleCoef_128, armBitRevIndexTable128, ARMBITREVINDEXTABLE_128_TABLE_LENGTH
|
||||||
const arm_cfft_instance_f32 arm_cfft_sR_f32_len16 = {
|
};
|
||||||
16, twiddleCoef_16, armBitRevIndexTable16, ARMBITREVINDEXTABLE__16_TABLE_LENGTH
|
|
||||||
};
|
const arm_cfft_instance_f32 arm_cfft_sR_f32_len256 = {
|
||||||
|
256, twiddleCoef_256, armBitRevIndexTable256, ARMBITREVINDEXTABLE_256_TABLE_LENGTH
|
||||||
const arm_cfft_instance_f32 arm_cfft_sR_f32_len32 = {
|
};
|
||||||
32, twiddleCoef_32, armBitRevIndexTable32, ARMBITREVINDEXTABLE__32_TABLE_LENGTH
|
|
||||||
};
|
const arm_cfft_instance_f32 arm_cfft_sR_f32_len512 = {
|
||||||
|
512, twiddleCoef_512, armBitRevIndexTable512, ARMBITREVINDEXTABLE_512_TABLE_LENGTH
|
||||||
const arm_cfft_instance_f32 arm_cfft_sR_f32_len64 = {
|
};
|
||||||
64, twiddleCoef_64, armBitRevIndexTable64, ARMBITREVINDEXTABLE__64_TABLE_LENGTH
|
|
||||||
};
|
const arm_cfft_instance_f32 arm_cfft_sR_f32_len1024 = {
|
||||||
|
1024, twiddleCoef_1024, armBitRevIndexTable1024, ARMBITREVINDEXTABLE_1024_TABLE_LENGTH
|
||||||
const arm_cfft_instance_f32 arm_cfft_sR_f32_len128 = {
|
};
|
||||||
128, twiddleCoef_128, armBitRevIndexTable128, ARMBITREVINDEXTABLE_128_TABLE_LENGTH
|
|
||||||
};
|
const arm_cfft_instance_f32 arm_cfft_sR_f32_len2048 = {
|
||||||
|
2048, twiddleCoef_2048, armBitRevIndexTable2048, ARMBITREVINDEXTABLE_2048_TABLE_LENGTH
|
||||||
const arm_cfft_instance_f32 arm_cfft_sR_f32_len256 = {
|
};
|
||||||
256, twiddleCoef_256, armBitRevIndexTable256, ARMBITREVINDEXTABLE_256_TABLE_LENGTH
|
|
||||||
};
|
const arm_cfft_instance_f32 arm_cfft_sR_f32_len4096 = {
|
||||||
|
4096, twiddleCoef_4096, armBitRevIndexTable4096, ARMBITREVINDEXTABLE_4096_TABLE_LENGTH
|
||||||
const arm_cfft_instance_f32 arm_cfft_sR_f32_len512 = {
|
};
|
||||||
512, twiddleCoef_512, armBitRevIndexTable512, ARMBITREVINDEXTABLE_512_TABLE_LENGTH
|
|
||||||
};
|
/* Fixed-point structs */
|
||||||
|
const arm_cfft_instance_q31 arm_cfft_sR_q31_len16 = {
|
||||||
const arm_cfft_instance_f32 arm_cfft_sR_f32_len1024 = {
|
16, twiddleCoef_16_q31, armBitRevIndexTable_fixed_16, ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH
|
||||||
1024, twiddleCoef_1024, armBitRevIndexTable1024, ARMBITREVINDEXTABLE1024_TABLE_LENGTH
|
};
|
||||||
};
|
|
||||||
|
const arm_cfft_instance_q31 arm_cfft_sR_q31_len32 = {
|
||||||
const arm_cfft_instance_f32 arm_cfft_sR_f32_len2048 = {
|
32, twiddleCoef_32_q31, armBitRevIndexTable_fixed_32, ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH
|
||||||
2048, twiddleCoef_2048, armBitRevIndexTable2048, ARMBITREVINDEXTABLE2048_TABLE_LENGTH
|
};
|
||||||
};
|
|
||||||
|
const arm_cfft_instance_q31 arm_cfft_sR_q31_len64 = {
|
||||||
const arm_cfft_instance_f32 arm_cfft_sR_f32_len4096 = {
|
64, twiddleCoef_64_q31, armBitRevIndexTable_fixed_64, ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH
|
||||||
4096, twiddleCoef_4096, armBitRevIndexTable4096, ARMBITREVINDEXTABLE4096_TABLE_LENGTH
|
};
|
||||||
};
|
|
||||||
|
const arm_cfft_instance_q31 arm_cfft_sR_q31_len128 = {
|
||||||
//Fixed-point structs
|
128, twiddleCoef_128_q31, armBitRevIndexTable_fixed_128, ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH
|
||||||
|
};
|
||||||
const arm_cfft_instance_q31 arm_cfft_sR_q31_len16 = {
|
|
||||||
16, twiddleCoef_16_q31, armBitRevIndexTable_fixed_16, ARMBITREVINDEXTABLE_FIXED___16_TABLE_LENGTH
|
const arm_cfft_instance_q31 arm_cfft_sR_q31_len256 = {
|
||||||
};
|
256, twiddleCoef_256_q31, armBitRevIndexTable_fixed_256, ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH
|
||||||
|
};
|
||||||
const arm_cfft_instance_q31 arm_cfft_sR_q31_len32 = {
|
|
||||||
32, twiddleCoef_32_q31, armBitRevIndexTable_fixed_32, ARMBITREVINDEXTABLE_FIXED___32_TABLE_LENGTH
|
const arm_cfft_instance_q31 arm_cfft_sR_q31_len512 = {
|
||||||
};
|
512, twiddleCoef_512_q31, armBitRevIndexTable_fixed_512, ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH
|
||||||
|
};
|
||||||
const arm_cfft_instance_q31 arm_cfft_sR_q31_len64 = {
|
|
||||||
64, twiddleCoef_64_q31, armBitRevIndexTable_fixed_64, ARMBITREVINDEXTABLE_FIXED___64_TABLE_LENGTH
|
const arm_cfft_instance_q31 arm_cfft_sR_q31_len1024 = {
|
||||||
};
|
1024, twiddleCoef_1024_q31, armBitRevIndexTable_fixed_1024, ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH
|
||||||
|
};
|
||||||
const arm_cfft_instance_q31 arm_cfft_sR_q31_len128 = {
|
|
||||||
128, twiddleCoef_128_q31, armBitRevIndexTable_fixed_128, ARMBITREVINDEXTABLE_FIXED__128_TABLE_LENGTH
|
const arm_cfft_instance_q31 arm_cfft_sR_q31_len2048 = {
|
||||||
};
|
2048, twiddleCoef_2048_q31, armBitRevIndexTable_fixed_2048, ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH
|
||||||
|
};
|
||||||
const arm_cfft_instance_q31 arm_cfft_sR_q31_len256 = {
|
|
||||||
256, twiddleCoef_256_q31, armBitRevIndexTable_fixed_256, ARMBITREVINDEXTABLE_FIXED__256_TABLE_LENGTH
|
const arm_cfft_instance_q31 arm_cfft_sR_q31_len4096 = {
|
||||||
};
|
4096, twiddleCoef_4096_q31, armBitRevIndexTable_fixed_4096, ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH
|
||||||
|
};
|
||||||
const arm_cfft_instance_q31 arm_cfft_sR_q31_len512 = {
|
|
||||||
512, twiddleCoef_512_q31, armBitRevIndexTable_fixed_512, ARMBITREVINDEXTABLE_FIXED__512_TABLE_LENGTH
|
const arm_cfft_instance_q15 arm_cfft_sR_q15_len16 = {
|
||||||
};
|
16, twiddleCoef_16_q15, armBitRevIndexTable_fixed_16, ARMBITREVINDEXTABLE_FIXED_16_TABLE_LENGTH
|
||||||
|
};
|
||||||
const arm_cfft_instance_q31 arm_cfft_sR_q31_len1024 = {
|
|
||||||
1024, twiddleCoef_1024_q31, armBitRevIndexTable_fixed_1024, ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH
|
const arm_cfft_instance_q15 arm_cfft_sR_q15_len32 = {
|
||||||
};
|
32, twiddleCoef_32_q15, armBitRevIndexTable_fixed_32, ARMBITREVINDEXTABLE_FIXED_32_TABLE_LENGTH
|
||||||
|
};
|
||||||
const arm_cfft_instance_q31 arm_cfft_sR_q31_len2048 = {
|
|
||||||
2048, twiddleCoef_2048_q31, armBitRevIndexTable_fixed_2048, ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH
|
const arm_cfft_instance_q15 arm_cfft_sR_q15_len64 = {
|
||||||
};
|
64, twiddleCoef_64_q15, armBitRevIndexTable_fixed_64, ARMBITREVINDEXTABLE_FIXED_64_TABLE_LENGTH
|
||||||
|
};
|
||||||
const arm_cfft_instance_q31 arm_cfft_sR_q31_len4096 = {
|
|
||||||
4096, twiddleCoef_4096_q31, armBitRevIndexTable_fixed_4096, ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH
|
const arm_cfft_instance_q15 arm_cfft_sR_q15_len128 = {
|
||||||
};
|
128, twiddleCoef_128_q15, armBitRevIndexTable_fixed_128, ARMBITREVINDEXTABLE_FIXED_128_TABLE_LENGTH
|
||||||
|
};
|
||||||
|
|
||||||
const arm_cfft_instance_q15 arm_cfft_sR_q15_len16 = {
|
const arm_cfft_instance_q15 arm_cfft_sR_q15_len256 = {
|
||||||
16, twiddleCoef_16_q15, armBitRevIndexTable_fixed_16, ARMBITREVINDEXTABLE_FIXED___16_TABLE_LENGTH
|
256, twiddleCoef_256_q15, armBitRevIndexTable_fixed_256, ARMBITREVINDEXTABLE_FIXED_256_TABLE_LENGTH
|
||||||
};
|
};
|
||||||
|
|
||||||
const arm_cfft_instance_q15 arm_cfft_sR_q15_len32 = {
|
const arm_cfft_instance_q15 arm_cfft_sR_q15_len512 = {
|
||||||
32, twiddleCoef_32_q15, armBitRevIndexTable_fixed_32, ARMBITREVINDEXTABLE_FIXED___32_TABLE_LENGTH
|
512, twiddleCoef_512_q15, armBitRevIndexTable_fixed_512, ARMBITREVINDEXTABLE_FIXED_512_TABLE_LENGTH
|
||||||
};
|
};
|
||||||
|
|
||||||
const arm_cfft_instance_q15 arm_cfft_sR_q15_len64 = {
|
const arm_cfft_instance_q15 arm_cfft_sR_q15_len1024 = {
|
||||||
64, twiddleCoef_64_q15, armBitRevIndexTable_fixed_64, ARMBITREVINDEXTABLE_FIXED___64_TABLE_LENGTH
|
1024, twiddleCoef_1024_q15, armBitRevIndexTable_fixed_1024, ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH
|
||||||
};
|
};
|
||||||
|
|
||||||
const arm_cfft_instance_q15 arm_cfft_sR_q15_len128 = {
|
const arm_cfft_instance_q15 arm_cfft_sR_q15_len2048 = {
|
||||||
128, twiddleCoef_128_q15, armBitRevIndexTable_fixed_128, ARMBITREVINDEXTABLE_FIXED__128_TABLE_LENGTH
|
2048, twiddleCoef_2048_q15, armBitRevIndexTable_fixed_2048, ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH
|
||||||
};
|
};
|
||||||
|
|
||||||
const arm_cfft_instance_q15 arm_cfft_sR_q15_len256 = {
|
const arm_cfft_instance_q15 arm_cfft_sR_q15_len4096 = {
|
||||||
256, twiddleCoef_256_q15, armBitRevIndexTable_fixed_256, ARMBITREVINDEXTABLE_FIXED__256_TABLE_LENGTH
|
4096, twiddleCoef_4096_q15, armBitRevIndexTable_fixed_4096, ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH
|
||||||
};
|
};
|
||||||
|
|
||||||
const arm_cfft_instance_q15 arm_cfft_sR_q15_len512 = {
|
/* Structure for real-value inputs */
|
||||||
512, twiddleCoef_512_q15, armBitRevIndexTable_fixed_512, ARMBITREVINDEXTABLE_FIXED__512_TABLE_LENGTH
|
/* Floating-point structs */
|
||||||
};
|
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len32 = {
|
||||||
|
{ 16, twiddleCoef_32, armBitRevIndexTable32, ARMBITREVINDEXTABLE_16_TABLE_LENGTH },
|
||||||
const arm_cfft_instance_q15 arm_cfft_sR_q15_len1024 = {
|
32U,
|
||||||
1024, twiddleCoef_1024_q15, armBitRevIndexTable_fixed_1024, ARMBITREVINDEXTABLE_FIXED_1024_TABLE_LENGTH
|
(float32_t *)twiddleCoef_rfft_32
|
||||||
};
|
};
|
||||||
|
|
||||||
const arm_cfft_instance_q15 arm_cfft_sR_q15_len2048 = {
|
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len64 = {
|
||||||
2048, twiddleCoef_2048_q15, armBitRevIndexTable_fixed_2048, ARMBITREVINDEXTABLE_FIXED_2048_TABLE_LENGTH
|
{ 32, twiddleCoef_32, armBitRevIndexTable32, ARMBITREVINDEXTABLE_32_TABLE_LENGTH },
|
||||||
};
|
64U,
|
||||||
|
(float32_t *)twiddleCoef_rfft_64
|
||||||
const arm_cfft_instance_q15 arm_cfft_sR_q15_len4096 = {
|
};
|
||||||
4096, twiddleCoef_4096_q15, armBitRevIndexTable_fixed_4096, ARMBITREVINDEXTABLE_FIXED_4096_TABLE_LENGTH
|
|
||||||
};
|
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len128 = {
|
||||||
|
{ 64, twiddleCoef_64, armBitRevIndexTable64, ARMBITREVINDEXTABLE_64_TABLE_LENGTH },
|
||||||
|
128U,
|
||||||
|
(float32_t *)twiddleCoef_rfft_128
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len256 = {
|
||||||
|
{ 128, twiddleCoef_128, armBitRevIndexTable128, ARMBITREVINDEXTABLE_128_TABLE_LENGTH },
|
||||||
|
256U,
|
||||||
|
(float32_t *)twiddleCoef_rfft_256
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len512 = {
|
||||||
|
{ 256, twiddleCoef_256, armBitRevIndexTable256, ARMBITREVINDEXTABLE_256_TABLE_LENGTH },
|
||||||
|
512U,
|
||||||
|
(float32_t *)twiddleCoef_rfft_512
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len1024 = {
|
||||||
|
{ 512, twiddleCoef_512, armBitRevIndexTable512, ARMBITREVINDEXTABLE_512_TABLE_LENGTH },
|
||||||
|
1024U,
|
||||||
|
(float32_t *)twiddleCoef_rfft_1024
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len2048 = {
|
||||||
|
{ 1024, twiddleCoef_1024, armBitRevIndexTable1024, ARMBITREVINDEXTABLE_1024_TABLE_LENGTH },
|
||||||
|
2048U,
|
||||||
|
(float32_t *)twiddleCoef_rfft_2048
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_fast_instance_f32 arm_rfft_fast_sR_f32_len4096 = {
|
||||||
|
{ 2048, twiddleCoef_2048, armBitRevIndexTable2048, ARMBITREVINDEXTABLE_2048_TABLE_LENGTH },
|
||||||
|
4096U,
|
||||||
|
(float32_t *)twiddleCoef_rfft_4096
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Fixed-point structs */
|
||||||
|
/* q31_t */
|
||||||
|
extern const q31_t realCoefAQ31[8192];
|
||||||
|
extern const q31_t realCoefBQ31[8192];
|
||||||
|
|
||||||
|
const arm_rfft_instance_q31 arm_rfft_sR_q31_len32 = {
|
||||||
|
32U,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
256U,
|
||||||
|
(q31_t*)realCoefAQ31,
|
||||||
|
(q31_t*)realCoefBQ31,
|
||||||
|
&arm_cfft_sR_q31_len16
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_instance_q31 arm_rfft_sR_q31_len64 = {
|
||||||
|
64U,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
128U,
|
||||||
|
(q31_t*)realCoefAQ31,
|
||||||
|
(q31_t*)realCoefBQ31,
|
||||||
|
&arm_cfft_sR_q31_len32
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_instance_q31 arm_rfft_sR_q31_len128 = {
|
||||||
|
128U,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
64U,
|
||||||
|
(q31_t*)realCoefAQ31,
|
||||||
|
(q31_t*)realCoefBQ31,
|
||||||
|
&arm_cfft_sR_q31_len64
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_instance_q31 arm_rfft_sR_q31_len256 = {
|
||||||
|
256U,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
32U,
|
||||||
|
(q31_t*)realCoefAQ31,
|
||||||
|
(q31_t*)realCoefBQ31,
|
||||||
|
&arm_cfft_sR_q31_len128
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_instance_q31 arm_rfft_sR_q31_len512 = {
|
||||||
|
512U,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
16U,
|
||||||
|
(q31_t*)realCoefAQ31,
|
||||||
|
(q31_t*)realCoefBQ31,
|
||||||
|
&arm_cfft_sR_q31_len256
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_instance_q31 arm_rfft_sR_q31_len1024 = {
|
||||||
|
1024U,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
8U,
|
||||||
|
(q31_t*)realCoefAQ31,
|
||||||
|
(q31_t*)realCoefBQ31,
|
||||||
|
&arm_cfft_sR_q31_len512
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_instance_q31 arm_rfft_sR_q31_len2048 = {
|
||||||
|
2048U,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
4U,
|
||||||
|
(q31_t*)realCoefAQ31,
|
||||||
|
(q31_t*)realCoefBQ31,
|
||||||
|
&arm_cfft_sR_q31_len1024
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_instance_q31 arm_rfft_sR_q31_len4096 = {
|
||||||
|
4096U,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
2U,
|
||||||
|
(q31_t*)realCoefAQ31,
|
||||||
|
(q31_t*)realCoefBQ31,
|
||||||
|
&arm_cfft_sR_q31_len2048
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_instance_q31 arm_rfft_sR_q31_len8192 = {
|
||||||
|
8192U,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
1U,
|
||||||
|
(q31_t*)realCoefAQ31,
|
||||||
|
(q31_t*)realCoefBQ31,
|
||||||
|
&arm_cfft_sR_q31_len4096
|
||||||
|
};
|
||||||
|
|
||||||
|
/* q15_t */
|
||||||
|
extern const q15_t realCoefAQ15[8192];
|
||||||
|
extern const q15_t realCoefBQ15[8192];
|
||||||
|
|
||||||
|
const arm_rfft_instance_q15 arm_rfft_sR_q15_len32 = {
|
||||||
|
32U,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
256U,
|
||||||
|
(q15_t*)realCoefAQ15,
|
||||||
|
(q15_t*)realCoefBQ15,
|
||||||
|
&arm_cfft_sR_q15_len16
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_instance_q15 arm_rfft_sR_q15_len64 = {
|
||||||
|
64U,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
128U,
|
||||||
|
(q15_t*)realCoefAQ15,
|
||||||
|
(q15_t*)realCoefBQ15,
|
||||||
|
&arm_cfft_sR_q15_len32
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_instance_q15 arm_rfft_sR_q15_len128 = {
|
||||||
|
128U,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
64U,
|
||||||
|
(q15_t*)realCoefAQ15,
|
||||||
|
(q15_t*)realCoefBQ15,
|
||||||
|
&arm_cfft_sR_q15_len64
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_instance_q15 arm_rfft_sR_q15_len256 = {
|
||||||
|
256U,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
32U,
|
||||||
|
(q15_t*)realCoefAQ15,
|
||||||
|
(q15_t*)realCoefBQ15,
|
||||||
|
&arm_cfft_sR_q15_len128
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_instance_q15 arm_rfft_sR_q15_len512 = {
|
||||||
|
512U,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
16U,
|
||||||
|
(q15_t*)realCoefAQ15,
|
||||||
|
(q15_t*)realCoefBQ15,
|
||||||
|
&arm_cfft_sR_q15_len256
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_instance_q15 arm_rfft_sR_q15_len1024 = {
|
||||||
|
1024U,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
8U,
|
||||||
|
(q15_t*)realCoefAQ15,
|
||||||
|
(q15_t*)realCoefBQ15,
|
||||||
|
&arm_cfft_sR_q15_len512
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_instance_q15 arm_rfft_sR_q15_len2048 = {
|
||||||
|
2048U,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
4U,
|
||||||
|
(q15_t*)realCoefAQ15,
|
||||||
|
(q15_t*)realCoefBQ15,
|
||||||
|
&arm_cfft_sR_q15_len1024
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_instance_q15 arm_rfft_sR_q15_len4096 = {
|
||||||
|
4096U,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
2U,
|
||||||
|
(q15_t*)realCoefAQ15,
|
||||||
|
(q15_t*)realCoefBQ15,
|
||||||
|
&arm_cfft_sR_q15_len2048
|
||||||
|
};
|
||||||
|
|
||||||
|
const arm_rfft_instance_q15 arm_rfft_sR_q15_len8192 = {
|
||||||
|
8192U,
|
||||||
|
0,
|
||||||
|
1,
|
||||||
|
1U,
|
||||||
|
(q15_t*)realCoefAQ15,
|
||||||
|
(q15_t*)realCoefBQ15,
|
||||||
|
&arm_cfft_sR_q15_len4096
|
||||||
|
};
|
||||||
|
|
|
@ -1,182 +1,171 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cmplx_conj_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Floating-point complex conjugate
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cmplx_conj_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Floating-point complex conjugate.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupCmplxMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @defgroup cmplx_conj Complex Conjugate
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* Conjugates the elements of a complex data vector.
|
||||||
* ---------------------------------------------------------------------------- */
|
*
|
||||||
#include "arm_math.h"
|
* The <code>pSrc</code> points to the source data and
|
||||||
|
* <code>pDst</code> points to the where the result should be written.
|
||||||
/**
|
* <code>numSamples</code> specifies the number of complex samples
|
||||||
* @ingroup groupCmplxMath
|
* and the data in each array is stored in an interleaved fashion
|
||||||
*/
|
* (real, imag, real, imag, ...).
|
||||||
|
* Each array has a total of <code>2*numSamples</code> values.
|
||||||
/**
|
* The underlying algorithm is used:
|
||||||
* @defgroup cmplx_conj Complex Conjugate
|
*
|
||||||
*
|
* <pre>
|
||||||
* Conjugates the elements of a complex data vector.
|
* for(n=0; n<numSamples; n++) {
|
||||||
*
|
* pDst[(2*n)+0)] = pSrc[(2*n)+0]; // real part
|
||||||
* The <code>pSrc</code> points to the source data and
|
* pDst[(2*n)+1)] = -pSrc[(2*n)+1]; // imag part
|
||||||
* <code>pDst</code> points to the where the result should be written.
|
* }
|
||||||
* <code>numSamples</code> specifies the number of complex samples
|
* </pre>
|
||||||
* and the data in each array is stored in an interleaved fashion
|
*
|
||||||
* (real, imag, real, imag, ...).
|
* There are separate functions for floating-point, Q15, and Q31 data types.
|
||||||
* Each array has a total of <code>2*numSamples</code> values.
|
*/
|
||||||
* The underlying algorithm is used:
|
|
||||||
*
|
/**
|
||||||
* <pre>
|
* @addtogroup cmplx_conj
|
||||||
* for(n=0; n<numSamples; n++) {
|
* @{
|
||||||
* pDst[(2*n)+0)] = pSrc[(2*n)+0]; // real part
|
*/
|
||||||
* pDst[(2*n)+1)] = -pSrc[(2*n)+1]; // imag part
|
|
||||||
* }
|
/**
|
||||||
* </pre>
|
* @brief Floating-point complex conjugate.
|
||||||
*
|
* @param *pSrc points to the input vector
|
||||||
* There are separate functions for floating-point, Q15, and Q31 data types.
|
* @param *pDst points to the output vector
|
||||||
*/
|
* @param numSamples number of complex samples in each vector
|
||||||
|
* @return none.
|
||||||
/**
|
*/
|
||||||
* @addtogroup cmplx_conj
|
|
||||||
* @{
|
void arm_cmplx_conj_f32(
|
||||||
*/
|
float32_t * pSrc,
|
||||||
|
float32_t * pDst,
|
||||||
/**
|
uint32_t numSamples)
|
||||||
* @brief Floating-point complex conjugate.
|
{
|
||||||
* @param *pSrc points to the input vector
|
uint32_t blkCnt; /* loop counter */
|
||||||
* @param *pDst points to the output vector
|
|
||||||
* @param numSamples number of complex samples in each vector
|
#if defined (ARM_MATH_DSP)
|
||||||
* @return none.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
float32_t inR1, inR2, inR3, inR4;
|
||||||
void arm_cmplx_conj_f32(
|
float32_t inI1, inI2, inI3, inI4;
|
||||||
float32_t * pSrc,
|
|
||||||
float32_t * pDst,
|
/*loop Unrolling */
|
||||||
uint32_t numSamples)
|
blkCnt = numSamples >> 2U;
|
||||||
{
|
|
||||||
uint32_t blkCnt; /* loop counter */
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
||||||
float32_t inR1, inR2, inR3, inR4;
|
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||||
float32_t inI1, inI2, inI3, inI4;
|
/* read real input samples */
|
||||||
|
inR1 = pSrc[0];
|
||||||
/*loop Unrolling */
|
/* store real samples to destination */
|
||||||
blkCnt = numSamples >> 2u;
|
pDst[0] = inR1;
|
||||||
|
inR2 = pSrc[2];
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
pDst[2] = inR2;
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
inR3 = pSrc[4];
|
||||||
while(blkCnt > 0u)
|
pDst[4] = inR3;
|
||||||
{
|
inR4 = pSrc[6];
|
||||||
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
pDst[6] = inR4;
|
||||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
|
||||||
/* read real input samples */
|
/* read imaginary input samples */
|
||||||
inR1 = pSrc[0];
|
inI1 = pSrc[1];
|
||||||
/* store real samples to destination */
|
inI2 = pSrc[3];
|
||||||
pDst[0] = inR1;
|
|
||||||
inR2 = pSrc[2];
|
/* conjugate input */
|
||||||
pDst[2] = inR2;
|
inI1 = -inI1;
|
||||||
inR3 = pSrc[4];
|
|
||||||
pDst[4] = inR3;
|
/* read imaginary input samples */
|
||||||
inR4 = pSrc[6];
|
inI3 = pSrc[5];
|
||||||
pDst[6] = inR4;
|
|
||||||
|
/* conjugate input */
|
||||||
/* read imaginary input samples */
|
inI2 = -inI2;
|
||||||
inI1 = pSrc[1];
|
|
||||||
inI2 = pSrc[3];
|
/* read imaginary input samples */
|
||||||
|
inI4 = pSrc[7];
|
||||||
/* conjugate input */
|
|
||||||
inI1 = -inI1;
|
/* conjugate input */
|
||||||
|
inI3 = -inI3;
|
||||||
/* read imaginary input samples */
|
|
||||||
inI3 = pSrc[5];
|
/* store imaginary samples to destination */
|
||||||
|
pDst[1] = inI1;
|
||||||
/* conjugate input */
|
pDst[3] = inI2;
|
||||||
inI2 = -inI2;
|
|
||||||
|
/* conjugate input */
|
||||||
/* read imaginary input samples */
|
inI4 = -inI4;
|
||||||
inI4 = pSrc[7];
|
|
||||||
|
/* store imaginary samples to destination */
|
||||||
/* conjugate input */
|
pDst[5] = inI3;
|
||||||
inI3 = -inI3;
|
|
||||||
|
/* increment source pointer by 8 to process next sampels */
|
||||||
/* store imaginary samples to destination */
|
pSrc += 8U;
|
||||||
pDst[1] = inI1;
|
|
||||||
pDst[3] = inI2;
|
/* store imaginary sample to destination */
|
||||||
|
pDst[7] = inI4;
|
||||||
/* conjugate input */
|
|
||||||
inI4 = -inI4;
|
/* increment destination pointer by 8 to store next samples */
|
||||||
|
pDst += 8U;
|
||||||
/* store imaginary samples to destination */
|
|
||||||
pDst[5] = inI3;
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* increment source pointer by 8 to process next sampels */
|
}
|
||||||
pSrc += 8u;
|
|
||||||
|
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||||
/* store imaginary sample to destination */
|
** No loop unrolling is used. */
|
||||||
pDst[7] = inI4;
|
blkCnt = numSamples % 0x4U;
|
||||||
|
|
||||||
/* increment destination pointer by 8 to store next samples */
|
#else
|
||||||
pDst += 8u;
|
|
||||||
|
/* Run the below code for Cortex-M0 */
|
||||||
/* Decrement the loop counter */
|
blkCnt = numSamples;
|
||||||
blkCnt--;
|
|
||||||
}
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
while (blkCnt > 0U)
|
||||||
** No loop unrolling is used. */
|
{
|
||||||
blkCnt = numSamples % 0x4u;
|
/* realOut + j (imagOut) = realIn + j (-1) imagIn */
|
||||||
|
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||||
#else
|
*pDst++ = *pSrc++;
|
||||||
|
*pDst++ = -*pSrc++;
|
||||||
/* Run the below code for Cortex-M0 */
|
|
||||||
blkCnt = numSamples;
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
}
|
||||||
|
}
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
/**
|
||||||
/* realOut + j (imagOut) = realIn + j (-1) imagIn */
|
* @} end of cmplx_conj group
|
||||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
*/
|
||||||
*pDst++ = *pSrc++;
|
|
||||||
*pDst++ = -*pSrc++;
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of cmplx_conj group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,161 +1,149 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cmplx_conj_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q15 complex conjugate
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cmplx_conj_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q15 complex conjugate.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupCmplxMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup cmplx_conj
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* ---------------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Q15 complex conjugate.
|
||||||
|
* @param *pSrc points to the input vector
|
||||||
/**
|
* @param *pDst points to the output vector
|
||||||
* @ingroup groupCmplxMath
|
* @param numSamples number of complex samples in each vector
|
||||||
*/
|
* @return none.
|
||||||
|
*
|
||||||
/**
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @addtogroup cmplx_conj
|
* \par
|
||||||
* @{
|
* The function uses saturating arithmetic.
|
||||||
*/
|
* The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
|
||||||
|
*/
|
||||||
/**
|
|
||||||
* @brief Q15 complex conjugate.
|
void arm_cmplx_conj_q15(
|
||||||
* @param *pSrc points to the input vector
|
q15_t * pSrc,
|
||||||
* @param *pDst points to the output vector
|
q15_t * pDst,
|
||||||
* @param numSamples number of complex samples in each vector
|
uint32_t numSamples)
|
||||||
* @return none.
|
{
|
||||||
*
|
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
#if defined (ARM_MATH_DSP)
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
* The Q15 value -1 (0x8000) will be saturated to the maximum allowable positive value 0x7FFF.
|
uint32_t blkCnt; /* loop counter */
|
||||||
*/
|
q31_t in1, in2, in3, in4;
|
||||||
|
q31_t zero = 0;
|
||||||
void arm_cmplx_conj_q15(
|
|
||||||
q15_t * pSrc,
|
/*loop Unrolling */
|
||||||
q15_t * pDst,
|
blkCnt = numSamples >> 2U;
|
||||||
uint32_t numSamples)
|
|
||||||
{
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
||||||
uint32_t blkCnt; /* loop counter */
|
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||||
q31_t in1, in2, in3, in4;
|
in1 = *__SIMD32(pSrc)++;
|
||||||
q31_t zero = 0;
|
in2 = *__SIMD32(pSrc)++;
|
||||||
|
in3 = *__SIMD32(pSrc)++;
|
||||||
/*loop Unrolling */
|
in4 = *__SIMD32(pSrc)++;
|
||||||
blkCnt = numSamples >> 2u;
|
|
||||||
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
in1 = __QASX(zero, in1);
|
||||||
while(blkCnt > 0u)
|
in2 = __QASX(zero, in2);
|
||||||
{
|
in3 = __QASX(zero, in3);
|
||||||
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
in4 = __QASX(zero, in4);
|
||||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
|
||||||
in1 = *__SIMD32(pSrc)++;
|
#else
|
||||||
in2 = *__SIMD32(pSrc)++;
|
|
||||||
in3 = *__SIMD32(pSrc)++;
|
in1 = __QSAX(zero, in1);
|
||||||
in4 = *__SIMD32(pSrc)++;
|
in2 = __QSAX(zero, in2);
|
||||||
|
in3 = __QSAX(zero, in3);
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
in4 = __QSAX(zero, in4);
|
||||||
|
|
||||||
in1 = __QASX(zero, in1);
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
in2 = __QASX(zero, in2);
|
|
||||||
in3 = __QASX(zero, in3);
|
in1 = ((uint32_t) in1 >> 16) | ((uint32_t) in1 << 16);
|
||||||
in4 = __QASX(zero, in4);
|
in2 = ((uint32_t) in2 >> 16) | ((uint32_t) in2 << 16);
|
||||||
|
in3 = ((uint32_t) in3 >> 16) | ((uint32_t) in3 << 16);
|
||||||
#else
|
in4 = ((uint32_t) in4 >> 16) | ((uint32_t) in4 << 16);
|
||||||
|
|
||||||
in1 = __QSAX(zero, in1);
|
*__SIMD32(pDst)++ = in1;
|
||||||
in2 = __QSAX(zero, in2);
|
*__SIMD32(pDst)++ = in2;
|
||||||
in3 = __QSAX(zero, in3);
|
*__SIMD32(pDst)++ = in3;
|
||||||
in4 = __QSAX(zero, in4);
|
*__SIMD32(pDst)++ = in4;
|
||||||
|
|
||||||
#endif // #ifndef ARM_MATH_BIG_ENDIAN
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
in1 = ((uint32_t) in1 >> 16) | ((uint32_t) in1 << 16);
|
}
|
||||||
in2 = ((uint32_t) in2 >> 16) | ((uint32_t) in2 << 16);
|
|
||||||
in3 = ((uint32_t) in3 >> 16) | ((uint32_t) in3 << 16);
|
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||||
in4 = ((uint32_t) in4 >> 16) | ((uint32_t) in4 << 16);
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = numSamples % 0x4U;
|
||||||
*__SIMD32(pDst)++ = in1;
|
|
||||||
*__SIMD32(pDst)++ = in2;
|
while (blkCnt > 0U)
|
||||||
*__SIMD32(pDst)++ = in3;
|
{
|
||||||
*__SIMD32(pDst)++ = in4;
|
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
||||||
|
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||||
/* Decrement the loop counter */
|
*pDst++ = *pSrc++;
|
||||||
blkCnt--;
|
*pDst++ = __SSAT(-*pSrc++, 16);
|
||||||
}
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
blkCnt--;
|
||||||
** No loop unrolling is used. */
|
}
|
||||||
blkCnt = numSamples % 0x4u;
|
|
||||||
|
#else
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
q15_t in;
|
||||||
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
|
||||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
/* Run the below code for Cortex-M0 */
|
||||||
*pDst++ = *pSrc++;
|
|
||||||
*pDst++ = __SSAT(-*pSrc++, 16);
|
while (numSamples > 0U)
|
||||||
|
{
|
||||||
/* Decrement the loop counter */
|
/* realOut + j (imagOut) = realIn+ j (-1) imagIn */
|
||||||
blkCnt--;
|
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||||
}
|
*pDst++ = *pSrc++;
|
||||||
|
in = *pSrc++;
|
||||||
#else
|
*pDst++ = (in == (q15_t) 0x8000) ? 0x7fff : -in;
|
||||||
|
|
||||||
q15_t in;
|
/* Decrement the loop counter */
|
||||||
|
numSamples--;
|
||||||
/* Run the below code for Cortex-M0 */
|
}
|
||||||
|
|
||||||
while(numSamples > 0u)
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
{
|
|
||||||
/* realOut + j (imagOut) = realIn+ j (-1) imagIn */
|
}
|
||||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
|
||||||
*pDst++ = *pSrc++;
|
/**
|
||||||
in = *pSrc++;
|
* @} end of cmplx_conj group
|
||||||
*pDst++ = (in == (q15_t) 0x8000) ? 0x7fff : -in;
|
*/
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
numSamples--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of cmplx_conj group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,180 +1,169 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cmplx_conj_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q31 complex conjugate
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cmplx_conj_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q31 complex conjugate.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupCmplxMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup cmplx_conj
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* ---------------------------------------------------------------------------- */
|
|
||||||
#include "arm_math.h"
|
/**
|
||||||
|
* @brief Q31 complex conjugate.
|
||||||
/**
|
* @param *pSrc points to the input vector
|
||||||
* @ingroup groupCmplxMath
|
* @param *pDst points to the output vector
|
||||||
*/
|
* @param numSamples number of complex samples in each vector
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup cmplx_conj
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The function uses saturating arithmetic.
|
||||||
|
* The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
|
||||||
/**
|
*/
|
||||||
* @brief Q31 complex conjugate.
|
|
||||||
* @param *pSrc points to the input vector
|
void arm_cmplx_conj_q31(
|
||||||
* @param *pDst points to the output vector
|
q31_t * pSrc,
|
||||||
* @param numSamples number of complex samples in each vector
|
q31_t * pDst,
|
||||||
* @return none.
|
uint32_t numSamples)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t blkCnt; /* loop counter */
|
||||||
* \par
|
q31_t in; /* Input value */
|
||||||
* The function uses saturating arithmetic.
|
|
||||||
* The Q31 value -1 (0x80000000) will be saturated to the maximum allowable positive value 0x7FFFFFFF.
|
#if defined (ARM_MATH_DSP)
|
||||||
*/
|
|
||||||
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
void arm_cmplx_conj_q31(
|
q31_t inR1, inR2, inR3, inR4; /* Temporary real variables */
|
||||||
q31_t * pSrc,
|
q31_t inI1, inI2, inI3, inI4; /* Temporary imaginary variables */
|
||||||
q31_t * pDst,
|
|
||||||
uint32_t numSamples)
|
/*loop Unrolling */
|
||||||
{
|
blkCnt = numSamples >> 2U;
|
||||||
uint32_t blkCnt; /* loop counter */
|
|
||||||
q31_t in; /* Input value */
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
||||||
q31_t inR1, inR2, inR3, inR4; /* Temporary real variables */
|
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||||
q31_t inI1, inI2, inI3, inI4; /* Temporary imaginary variables */
|
/* Saturated to 0x7fffffff if the input is -1(0x80000000) */
|
||||||
|
/* read real input sample */
|
||||||
/*loop Unrolling */
|
inR1 = pSrc[0];
|
||||||
blkCnt = numSamples >> 2u;
|
/* store real input sample */
|
||||||
|
pDst[0] = inR1;
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
/* read imaginary input sample */
|
||||||
while(blkCnt > 0u)
|
inI1 = pSrc[1];
|
||||||
{
|
|
||||||
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
/* read real input sample */
|
||||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
inR2 = pSrc[2];
|
||||||
/* Saturated to 0x7fffffff if the input is -1(0x80000000) */
|
/* store real input sample */
|
||||||
/* read real input sample */
|
pDst[2] = inR2;
|
||||||
inR1 = pSrc[0];
|
|
||||||
/* store real input sample */
|
/* read imaginary input sample */
|
||||||
pDst[0] = inR1;
|
inI2 = pSrc[3];
|
||||||
|
|
||||||
/* read imaginary input sample */
|
/* negate imaginary input sample */
|
||||||
inI1 = pSrc[1];
|
inI1 = __QSUB(0, inI1);
|
||||||
|
|
||||||
/* read real input sample */
|
/* read real input sample */
|
||||||
inR2 = pSrc[2];
|
inR3 = pSrc[4];
|
||||||
/* store real input sample */
|
/* store real input sample */
|
||||||
pDst[2] = inR2;
|
pDst[4] = inR3;
|
||||||
|
|
||||||
/* read imaginary input sample */
|
/* read imaginary input sample */
|
||||||
inI2 = pSrc[3];
|
inI3 = pSrc[5];
|
||||||
|
|
||||||
/* negate imaginary input sample */
|
/* negate imaginary input sample */
|
||||||
inI1 = __QSUB(0, inI1);
|
inI2 = __QSUB(0, inI2);
|
||||||
|
|
||||||
/* read real input sample */
|
/* read real input sample */
|
||||||
inR3 = pSrc[4];
|
inR4 = pSrc[6];
|
||||||
/* store real input sample */
|
/* store real input sample */
|
||||||
pDst[4] = inR3;
|
pDst[6] = inR4;
|
||||||
|
|
||||||
/* read imaginary input sample */
|
/* negate imaginary input sample */
|
||||||
inI3 = pSrc[5];
|
inI3 = __QSUB(0, inI3);
|
||||||
|
|
||||||
/* negate imaginary input sample */
|
/* store imaginary input sample */
|
||||||
inI2 = __QSUB(0, inI2);
|
inI4 = pSrc[7];
|
||||||
|
|
||||||
/* read real input sample */
|
/* store imaginary input samples */
|
||||||
inR4 = pSrc[6];
|
pDst[1] = inI1;
|
||||||
/* store real input sample */
|
|
||||||
pDst[6] = inR4;
|
/* negate imaginary input sample */
|
||||||
|
inI4 = __QSUB(0, inI4);
|
||||||
/* negate imaginary input sample */
|
|
||||||
inI3 = __QSUB(0, inI3);
|
/* store imaginary input samples */
|
||||||
|
pDst[3] = inI2;
|
||||||
/* store imaginary input sample */
|
|
||||||
inI4 = pSrc[7];
|
/* increment source pointer by 8 to proecess next samples */
|
||||||
|
pSrc += 8U;
|
||||||
/* store imaginary input samples */
|
|
||||||
pDst[1] = inI1;
|
/* store imaginary input samples */
|
||||||
|
pDst[5] = inI3;
|
||||||
/* negate imaginary input sample */
|
pDst[7] = inI4;
|
||||||
inI4 = __QSUB(0, inI4);
|
|
||||||
|
/* increment destination pointer by 8 to process next samples */
|
||||||
/* store imaginary input samples */
|
pDst += 8U;
|
||||||
pDst[3] = inI2;
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* increment source pointer by 8 to proecess next samples */
|
blkCnt--;
|
||||||
pSrc += 8u;
|
}
|
||||||
|
|
||||||
/* store imaginary input samples */
|
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||||
pDst[5] = inI3;
|
** No loop unrolling is used. */
|
||||||
pDst[7] = inI4;
|
blkCnt = numSamples % 0x4U;
|
||||||
|
|
||||||
/* increment destination pointer by 8 to process next samples */
|
#else
|
||||||
pDst += 8u;
|
|
||||||
|
/* Run the below code for Cortex-M0 */
|
||||||
/* Decrement the loop counter */
|
blkCnt = numSamples;
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
while (blkCnt > 0U)
|
||||||
blkCnt = numSamples % 0x4u;
|
{
|
||||||
|
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
||||||
#else
|
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
||||||
|
/* Saturated to 0x7fffffff if the input is -1(0x80000000) */
|
||||||
/* Run the below code for Cortex-M0 */
|
*pDst++ = *pSrc++;
|
||||||
blkCnt = numSamples;
|
in = *pSrc++;
|
||||||
|
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
while(blkCnt > 0u)
|
}
|
||||||
{
|
}
|
||||||
/* C[0]+jC[1] = A[0]+ j (-1) A[1] */
|
|
||||||
/* Calculate Complex Conjugate and then store the results in the destination buffer. */
|
/**
|
||||||
/* Saturated to 0x7fffffff if the input is -1(0x80000000) */
|
* @} end of cmplx_conj group
|
||||||
*pDst++ = *pSrc++;
|
*/
|
||||||
in = *pSrc++;
|
|
||||||
*pDst++ = (in == INT32_MIN) ? INT32_MAX : -in;
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of cmplx_conj group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,203 +1,191 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cmplx_dot_prod_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Floating-point complex dot product
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cmplx_dot_prod_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Floating-point complex dot product
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupCmplxMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @defgroup cmplx_dot_prod Complex Dot Product
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* Computes the dot product of two complex vectors.
|
||||||
* ---------------------------------------------------------------------------- */
|
* The vectors are multiplied element-by-element and then summed.
|
||||||
|
*
|
||||||
#include "arm_math.h"
|
* The <code>pSrcA</code> points to the first complex input vector and
|
||||||
|
* <code>pSrcB</code> points to the second complex input vector.
|
||||||
/**
|
* <code>numSamples</code> specifies the number of complex samples
|
||||||
* @ingroup groupCmplxMath
|
* and the data in each array is stored in an interleaved fashion
|
||||||
*/
|
* (real, imag, real, imag, ...).
|
||||||
|
* Each array has a total of <code>2*numSamples</code> values.
|
||||||
/**
|
*
|
||||||
* @defgroup cmplx_dot_prod Complex Dot Product
|
* The underlying algorithm is used:
|
||||||
*
|
* <pre>
|
||||||
* Computes the dot product of two complex vectors.
|
* realResult=0;
|
||||||
* The vectors are multiplied element-by-element and then summed.
|
* imagResult=0;
|
||||||
*
|
* for(n=0; n<numSamples; n++) {
|
||||||
* The <code>pSrcA</code> points to the first complex input vector and
|
* realResult += pSrcA[(2*n)+0]*pSrcB[(2*n)+0] - pSrcA[(2*n)+1]*pSrcB[(2*n)+1];
|
||||||
* <code>pSrcB</code> points to the second complex input vector.
|
* imagResult += pSrcA[(2*n)+0]*pSrcB[(2*n)+1] + pSrcA[(2*n)+1]*pSrcB[(2*n)+0];
|
||||||
* <code>numSamples</code> specifies the number of complex samples
|
* }
|
||||||
* and the data in each array is stored in an interleaved fashion
|
* </pre>
|
||||||
* (real, imag, real, imag, ...).
|
*
|
||||||
* Each array has a total of <code>2*numSamples</code> values.
|
* There are separate functions for floating-point, Q15, and Q31 data types.
|
||||||
*
|
*/
|
||||||
* The underlying algorithm is used:
|
|
||||||
* <pre>
|
/**
|
||||||
* realResult=0;
|
* @addtogroup cmplx_dot_prod
|
||||||
* imagResult=0;
|
* @{
|
||||||
* for(n=0; n<numSamples; n++) {
|
*/
|
||||||
* realResult += pSrcA[(2*n)+0]*pSrcB[(2*n)+0] - pSrcA[(2*n)+1]*pSrcB[(2*n)+1];
|
|
||||||
* imagResult += pSrcA[(2*n)+0]*pSrcB[(2*n)+1] + pSrcA[(2*n)+1]*pSrcB[(2*n)+0];
|
/**
|
||||||
* }
|
* @brief Floating-point complex dot product
|
||||||
* </pre>
|
* @param *pSrcA points to the first input vector
|
||||||
*
|
* @param *pSrcB points to the second input vector
|
||||||
* There are separate functions for floating-point, Q15, and Q31 data types.
|
* @param numSamples number of complex samples in each vector
|
||||||
*/
|
* @param *realResult real part of the result returned here
|
||||||
|
* @param *imagResult imaginary part of the result returned here
|
||||||
/**
|
* @return none.
|
||||||
* @addtogroup cmplx_dot_prod
|
*/
|
||||||
* @{
|
|
||||||
*/
|
void arm_cmplx_dot_prod_f32(
|
||||||
|
float32_t * pSrcA,
|
||||||
/**
|
float32_t * pSrcB,
|
||||||
* @brief Floating-point complex dot product
|
uint32_t numSamples,
|
||||||
* @param *pSrcA points to the first input vector
|
float32_t * realResult,
|
||||||
* @param *pSrcB points to the second input vector
|
float32_t * imagResult)
|
||||||
* @param numSamples number of complex samples in each vector
|
{
|
||||||
* @param *realResult real part of the result returned here
|
float32_t real_sum = 0.0f, imag_sum = 0.0f; /* Temporary result storage */
|
||||||
* @param *imagResult imaginary part of the result returned here
|
float32_t a0,b0,c0,d0;
|
||||||
* @return none.
|
|
||||||
*/
|
#if defined (ARM_MATH_DSP)
|
||||||
|
|
||||||
void arm_cmplx_dot_prod_f32(
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
float32_t * pSrcA,
|
uint32_t blkCnt; /* loop counter */
|
||||||
float32_t * pSrcB,
|
|
||||||
uint32_t numSamples,
|
/*loop Unrolling */
|
||||||
float32_t * realResult,
|
blkCnt = numSamples >> 2U;
|
||||||
float32_t * imagResult)
|
|
||||||
{
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
float32_t real_sum = 0.0f, imag_sum = 0.0f; /* Temporary result storage */
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
float32_t a0,b0,c0,d0;
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
a0 = *pSrcA++;
|
||||||
|
b0 = *pSrcA++;
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
c0 = *pSrcB++;
|
||||||
uint32_t blkCnt; /* loop counter */
|
d0 = *pSrcB++;
|
||||||
|
|
||||||
/*loop Unrolling */
|
real_sum += a0 * c0;
|
||||||
blkCnt = numSamples >> 2u;
|
imag_sum += a0 * d0;
|
||||||
|
real_sum -= b0 * d0;
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
imag_sum += b0 * c0;
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
|
||||||
while(blkCnt > 0u)
|
a0 = *pSrcA++;
|
||||||
{
|
b0 = *pSrcA++;
|
||||||
a0 = *pSrcA++;
|
c0 = *pSrcB++;
|
||||||
b0 = *pSrcA++;
|
d0 = *pSrcB++;
|
||||||
c0 = *pSrcB++;
|
|
||||||
d0 = *pSrcB++;
|
real_sum += a0 * c0;
|
||||||
|
imag_sum += a0 * d0;
|
||||||
real_sum += a0 * c0;
|
real_sum -= b0 * d0;
|
||||||
imag_sum += a0 * d0;
|
imag_sum += b0 * c0;
|
||||||
real_sum -= b0 * d0;
|
|
||||||
imag_sum += b0 * c0;
|
a0 = *pSrcA++;
|
||||||
|
b0 = *pSrcA++;
|
||||||
a0 = *pSrcA++;
|
c0 = *pSrcB++;
|
||||||
b0 = *pSrcA++;
|
d0 = *pSrcB++;
|
||||||
c0 = *pSrcB++;
|
|
||||||
d0 = *pSrcB++;
|
real_sum += a0 * c0;
|
||||||
|
imag_sum += a0 * d0;
|
||||||
real_sum += a0 * c0;
|
real_sum -= b0 * d0;
|
||||||
imag_sum += a0 * d0;
|
imag_sum += b0 * c0;
|
||||||
real_sum -= b0 * d0;
|
|
||||||
imag_sum += b0 * c0;
|
a0 = *pSrcA++;
|
||||||
|
b0 = *pSrcA++;
|
||||||
a0 = *pSrcA++;
|
c0 = *pSrcB++;
|
||||||
b0 = *pSrcA++;
|
d0 = *pSrcB++;
|
||||||
c0 = *pSrcB++;
|
|
||||||
d0 = *pSrcB++;
|
real_sum += a0 * c0;
|
||||||
|
imag_sum += a0 * d0;
|
||||||
real_sum += a0 * c0;
|
real_sum -= b0 * d0;
|
||||||
imag_sum += a0 * d0;
|
imag_sum += b0 * c0;
|
||||||
real_sum -= b0 * d0;
|
|
||||||
imag_sum += b0 * c0;
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
a0 = *pSrcA++;
|
}
|
||||||
b0 = *pSrcA++;
|
|
||||||
c0 = *pSrcB++;
|
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||||
d0 = *pSrcB++;
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = numSamples & 0x3U;
|
||||||
real_sum += a0 * c0;
|
|
||||||
imag_sum += a0 * d0;
|
while (blkCnt > 0U)
|
||||||
real_sum -= b0 * d0;
|
{
|
||||||
imag_sum += b0 * c0;
|
a0 = *pSrcA++;
|
||||||
|
b0 = *pSrcA++;
|
||||||
/* Decrement the loop counter */
|
c0 = *pSrcB++;
|
||||||
blkCnt--;
|
d0 = *pSrcB++;
|
||||||
}
|
|
||||||
|
real_sum += a0 * c0;
|
||||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
imag_sum += a0 * d0;
|
||||||
** No loop unrolling is used. */
|
real_sum -= b0 * d0;
|
||||||
blkCnt = numSamples & 0x3u;
|
imag_sum += b0 * c0;
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/* Decrement the loop counter */
|
||||||
{
|
blkCnt--;
|
||||||
a0 = *pSrcA++;
|
}
|
||||||
b0 = *pSrcA++;
|
|
||||||
c0 = *pSrcB++;
|
#else
|
||||||
d0 = *pSrcB++;
|
|
||||||
|
/* Run the below code for Cortex-M0 */
|
||||||
real_sum += a0 * c0;
|
|
||||||
imag_sum += a0 * d0;
|
while (numSamples > 0U)
|
||||||
real_sum -= b0 * d0;
|
{
|
||||||
imag_sum += b0 * c0;
|
a0 = *pSrcA++;
|
||||||
|
b0 = *pSrcA++;
|
||||||
/* Decrement the loop counter */
|
c0 = *pSrcB++;
|
||||||
blkCnt--;
|
d0 = *pSrcB++;
|
||||||
}
|
|
||||||
|
real_sum += a0 * c0;
|
||||||
#else
|
imag_sum += a0 * d0;
|
||||||
|
real_sum -= b0 * d0;
|
||||||
/* Run the below code for Cortex-M0 */
|
imag_sum += b0 * c0;
|
||||||
|
|
||||||
while(numSamples > 0u)
|
/* Decrement the loop counter */
|
||||||
{
|
numSamples--;
|
||||||
a0 = *pSrcA++;
|
}
|
||||||
b0 = *pSrcA++;
|
|
||||||
c0 = *pSrcB++;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
d0 = *pSrcB++;
|
|
||||||
|
/* Store the real and imaginary results in the destination buffers */
|
||||||
real_sum += a0 * c0;
|
*realResult = real_sum;
|
||||||
imag_sum += a0 * d0;
|
*imagResult = imag_sum;
|
||||||
real_sum -= b0 * d0;
|
}
|
||||||
imag_sum += b0 * c0;
|
|
||||||
|
/**
|
||||||
/* Decrement the loop counter */
|
* @} end of cmplx_dot_prod group
|
||||||
numSamples--;
|
*/
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
/* Store the real and imaginary results in the destination buffers */
|
|
||||||
*realResult = real_sum;
|
|
||||||
*imagResult = imag_sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of cmplx_dot_prod group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,189 +1,177 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cmplx_dot_prod_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Processing function for the Q15 Complex Dot product
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cmplx_dot_prod_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Processing function for the Q15 Complex Dot product
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupCmplxMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup cmplx_dot_prod
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Q15 complex dot product
|
||||||
|
* @param *pSrcA points to the first input vector
|
||||||
/**
|
* @param *pSrcB points to the second input vector
|
||||||
* @ingroup groupCmplxMath
|
* @param numSamples number of complex samples in each vector
|
||||||
*/
|
* @param *realResult real part of the result returned here
|
||||||
|
* @param *imagResult imaginary part of the result returned here
|
||||||
/**
|
* @return none.
|
||||||
* @addtogroup cmplx_dot_prod
|
*
|
||||||
* @{
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
*/
|
* \par
|
||||||
|
* The function is implemented using an internal 64-bit accumulator.
|
||||||
/**
|
* The intermediate 1.15 by 1.15 multiplications are performed with full precision and yield a 2.30 result.
|
||||||
* @brief Q15 complex dot product
|
* These are accumulated in a 64-bit accumulator with 34.30 precision.
|
||||||
* @param *pSrcA points to the first input vector
|
* As a final step, the accumulators are converted to 8.24 format.
|
||||||
* @param *pSrcB points to the second input vector
|
* The return results <code>realResult</code> and <code>imagResult</code> are in 8.24 format.
|
||||||
* @param numSamples number of complex samples in each vector
|
*/
|
||||||
* @param *realResult real part of the result returned here
|
|
||||||
* @param *imagResult imaginary part of the result returned here
|
void arm_cmplx_dot_prod_q15(
|
||||||
* @return none.
|
q15_t * pSrcA,
|
||||||
*
|
q15_t * pSrcB,
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
uint32_t numSamples,
|
||||||
* \par
|
q31_t * realResult,
|
||||||
* The function is implemented using an internal 64-bit accumulator.
|
q31_t * imagResult)
|
||||||
* The intermediate 1.15 by 1.15 multiplications are performed with full precision and yield a 2.30 result.
|
{
|
||||||
* These are accumulated in a 64-bit accumulator with 34.30 precision.
|
q63_t real_sum = 0, imag_sum = 0; /* Temporary result storage */
|
||||||
* As a final step, the accumulators are converted to 8.24 format.
|
q15_t a0,b0,c0,d0;
|
||||||
* The return results <code>realResult</code> and <code>imagResult</code> are in 8.24 format.
|
|
||||||
*/
|
#if defined (ARM_MATH_DSP)
|
||||||
|
|
||||||
void arm_cmplx_dot_prod_q15(
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
q15_t * pSrcA,
|
uint32_t blkCnt; /* loop counter */
|
||||||
q15_t * pSrcB,
|
|
||||||
uint32_t numSamples,
|
|
||||||
q31_t * realResult,
|
/*loop Unrolling */
|
||||||
q31_t * imagResult)
|
blkCnt = numSamples >> 2U;
|
||||||
{
|
|
||||||
q63_t real_sum = 0, imag_sum = 0; /* Temporary result storage */
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
q15_t a0,b0,c0,d0;
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
{
|
||||||
|
a0 = *pSrcA++;
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
b0 = *pSrcA++;
|
||||||
uint32_t blkCnt; /* loop counter */
|
c0 = *pSrcB++;
|
||||||
|
d0 = *pSrcB++;
|
||||||
|
|
||||||
/*loop Unrolling */
|
real_sum += (q31_t)a0 * c0;
|
||||||
blkCnt = numSamples >> 2u;
|
imag_sum += (q31_t)a0 * d0;
|
||||||
|
real_sum -= (q31_t)b0 * d0;
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
imag_sum += (q31_t)b0 * c0;
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
|
||||||
while(blkCnt > 0u)
|
a0 = *pSrcA++;
|
||||||
{
|
b0 = *pSrcA++;
|
||||||
a0 = *pSrcA++;
|
c0 = *pSrcB++;
|
||||||
b0 = *pSrcA++;
|
d0 = *pSrcB++;
|
||||||
c0 = *pSrcB++;
|
|
||||||
d0 = *pSrcB++;
|
real_sum += (q31_t)a0 * c0;
|
||||||
|
imag_sum += (q31_t)a0 * d0;
|
||||||
real_sum += (q31_t)a0 * c0;
|
real_sum -= (q31_t)b0 * d0;
|
||||||
imag_sum += (q31_t)a0 * d0;
|
imag_sum += (q31_t)b0 * c0;
|
||||||
real_sum -= (q31_t)b0 * d0;
|
|
||||||
imag_sum += (q31_t)b0 * c0;
|
a0 = *pSrcA++;
|
||||||
|
b0 = *pSrcA++;
|
||||||
a0 = *pSrcA++;
|
c0 = *pSrcB++;
|
||||||
b0 = *pSrcA++;
|
d0 = *pSrcB++;
|
||||||
c0 = *pSrcB++;
|
|
||||||
d0 = *pSrcB++;
|
real_sum += (q31_t)a0 * c0;
|
||||||
|
imag_sum += (q31_t)a0 * d0;
|
||||||
real_sum += (q31_t)a0 * c0;
|
real_sum -= (q31_t)b0 * d0;
|
||||||
imag_sum += (q31_t)a0 * d0;
|
imag_sum += (q31_t)b0 * c0;
|
||||||
real_sum -= (q31_t)b0 * d0;
|
|
||||||
imag_sum += (q31_t)b0 * c0;
|
a0 = *pSrcA++;
|
||||||
|
b0 = *pSrcA++;
|
||||||
a0 = *pSrcA++;
|
c0 = *pSrcB++;
|
||||||
b0 = *pSrcA++;
|
d0 = *pSrcB++;
|
||||||
c0 = *pSrcB++;
|
|
||||||
d0 = *pSrcB++;
|
real_sum += (q31_t)a0 * c0;
|
||||||
|
imag_sum += (q31_t)a0 * d0;
|
||||||
real_sum += (q31_t)a0 * c0;
|
real_sum -= (q31_t)b0 * d0;
|
||||||
imag_sum += (q31_t)a0 * d0;
|
imag_sum += (q31_t)b0 * c0;
|
||||||
real_sum -= (q31_t)b0 * d0;
|
|
||||||
imag_sum += (q31_t)b0 * c0;
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
a0 = *pSrcA++;
|
}
|
||||||
b0 = *pSrcA++;
|
|
||||||
c0 = *pSrcB++;
|
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||||
d0 = *pSrcB++;
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = numSamples % 0x4U;
|
||||||
real_sum += (q31_t)a0 * c0;
|
|
||||||
imag_sum += (q31_t)a0 * d0;
|
while (blkCnt > 0U)
|
||||||
real_sum -= (q31_t)b0 * d0;
|
{
|
||||||
imag_sum += (q31_t)b0 * c0;
|
a0 = *pSrcA++;
|
||||||
|
b0 = *pSrcA++;
|
||||||
/* Decrement the loop counter */
|
c0 = *pSrcB++;
|
||||||
blkCnt--;
|
d0 = *pSrcB++;
|
||||||
}
|
|
||||||
|
real_sum += (q31_t)a0 * c0;
|
||||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
imag_sum += (q31_t)a0 * d0;
|
||||||
** No loop unrolling is used. */
|
real_sum -= (q31_t)b0 * d0;
|
||||||
blkCnt = numSamples % 0x4u;
|
imag_sum += (q31_t)b0 * c0;
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/* Decrement the loop counter */
|
||||||
{
|
blkCnt--;
|
||||||
a0 = *pSrcA++;
|
}
|
||||||
b0 = *pSrcA++;
|
|
||||||
c0 = *pSrcB++;
|
#else
|
||||||
d0 = *pSrcB++;
|
|
||||||
|
/* Run the below code for Cortex-M0 */
|
||||||
real_sum += (q31_t)a0 * c0;
|
|
||||||
imag_sum += (q31_t)a0 * d0;
|
while (numSamples > 0U)
|
||||||
real_sum -= (q31_t)b0 * d0;
|
{
|
||||||
imag_sum += (q31_t)b0 * c0;
|
a0 = *pSrcA++;
|
||||||
|
b0 = *pSrcA++;
|
||||||
/* Decrement the loop counter */
|
c0 = *pSrcB++;
|
||||||
blkCnt--;
|
d0 = *pSrcB++;
|
||||||
}
|
|
||||||
|
real_sum += a0 * c0;
|
||||||
#else
|
imag_sum += a0 * d0;
|
||||||
|
real_sum -= b0 * d0;
|
||||||
/* Run the below code for Cortex-M0 */
|
imag_sum += b0 * c0;
|
||||||
|
|
||||||
while(numSamples > 0u)
|
|
||||||
{
|
/* Decrement the loop counter */
|
||||||
a0 = *pSrcA++;
|
numSamples--;
|
||||||
b0 = *pSrcA++;
|
}
|
||||||
c0 = *pSrcB++;
|
|
||||||
d0 = *pSrcB++;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
real_sum += a0 * c0;
|
/* Store the real and imaginary results in 8.24 format */
|
||||||
imag_sum += a0 * d0;
|
/* Convert real data in 34.30 to 8.24 by 6 right shifts */
|
||||||
real_sum -= b0 * d0;
|
*realResult = (q31_t) (real_sum >> 6);
|
||||||
imag_sum += b0 * c0;
|
/* Convert imaginary data in 34.30 to 8.24 by 6 right shifts */
|
||||||
|
*imagResult = (q31_t) (imag_sum >> 6);
|
||||||
|
}
|
||||||
/* Decrement the loop counter */
|
|
||||||
numSamples--;
|
/**
|
||||||
}
|
* @} end of cmplx_dot_prod group
|
||||||
|
*/
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
/* Store the real and imaginary results in 8.24 format */
|
|
||||||
/* Convert real data in 34.30 to 8.24 by 6 right shifts */
|
|
||||||
*realResult = (q31_t) (real_sum >> 6);
|
|
||||||
/* Convert imaginary data in 34.30 to 8.24 by 6 right shifts */
|
|
||||||
*imagResult = (q31_t) (imag_sum >> 6);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of cmplx_dot_prod group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,187 +1,175 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cmplx_dot_prod_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q31 complex dot product
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cmplx_dot_prod_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q31 complex dot product
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupCmplxMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup cmplx_dot_prod
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Q31 complex dot product
|
||||||
|
* @param *pSrcA points to the first input vector
|
||||||
/**
|
* @param *pSrcB points to the second input vector
|
||||||
* @ingroup groupCmplxMath
|
* @param numSamples number of complex samples in each vector
|
||||||
*/
|
* @param *realResult real part of the result returned here
|
||||||
|
* @param *imagResult imaginary part of the result returned here
|
||||||
/**
|
* @return none.
|
||||||
* @addtogroup cmplx_dot_prod
|
*
|
||||||
* @{
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
*/
|
* \par
|
||||||
|
* The function is implemented using an internal 64-bit accumulator.
|
||||||
/**
|
* The intermediate 1.31 by 1.31 multiplications are performed with 64-bit precision and then shifted to 16.48 format.
|
||||||
* @brief Q31 complex dot product
|
* The internal real and imaginary accumulators are in 16.48 format and provide 15 guard bits.
|
||||||
* @param *pSrcA points to the first input vector
|
* Additions are nonsaturating and no overflow will occur as long as <code>numSamples</code> is less than 32768.
|
||||||
* @param *pSrcB points to the second input vector
|
* The return results <code>realResult</code> and <code>imagResult</code> are in 16.48 format.
|
||||||
* @param numSamples number of complex samples in each vector
|
* Input down scaling is not required.
|
||||||
* @param *realResult real part of the result returned here
|
*/
|
||||||
* @param *imagResult imaginary part of the result returned here
|
|
||||||
* @return none.
|
void arm_cmplx_dot_prod_q31(
|
||||||
*
|
q31_t * pSrcA,
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
q31_t * pSrcB,
|
||||||
* \par
|
uint32_t numSamples,
|
||||||
* The function is implemented using an internal 64-bit accumulator.
|
q63_t * realResult,
|
||||||
* The intermediate 1.31 by 1.31 multiplications are performed with 64-bit precision and then shifted to 16.48 format.
|
q63_t * imagResult)
|
||||||
* The internal real and imaginary accumulators are in 16.48 format and provide 15 guard bits.
|
{
|
||||||
* Additions are nonsaturating and no overflow will occur as long as <code>numSamples</code> is less than 32768.
|
q63_t real_sum = 0, imag_sum = 0; /* Temporary result storage */
|
||||||
* The return results <code>realResult</code> and <code>imagResult</code> are in 16.48 format.
|
q31_t a0,b0,c0,d0;
|
||||||
* Input down scaling is not required.
|
|
||||||
*/
|
#if defined (ARM_MATH_DSP)
|
||||||
|
|
||||||
void arm_cmplx_dot_prod_q31(
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
q31_t * pSrcA,
|
uint32_t blkCnt; /* loop counter */
|
||||||
q31_t * pSrcB,
|
|
||||||
uint32_t numSamples,
|
|
||||||
q63_t * realResult,
|
/*loop Unrolling */
|
||||||
q63_t * imagResult)
|
blkCnt = numSamples >> 2U;
|
||||||
{
|
|
||||||
q63_t real_sum = 0, imag_sum = 0; /* Temporary result storage */
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
q31_t a0,b0,c0,d0;
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
{
|
||||||
|
a0 = *pSrcA++;
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
b0 = *pSrcA++;
|
||||||
uint32_t blkCnt; /* loop counter */
|
c0 = *pSrcB++;
|
||||||
|
d0 = *pSrcB++;
|
||||||
|
|
||||||
/*loop Unrolling */
|
real_sum += ((q63_t)a0 * c0) >> 14;
|
||||||
blkCnt = numSamples >> 2u;
|
imag_sum += ((q63_t)a0 * d0) >> 14;
|
||||||
|
real_sum -= ((q63_t)b0 * d0) >> 14;
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
imag_sum += ((q63_t)b0 * c0) >> 14;
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
|
||||||
while(blkCnt > 0u)
|
a0 = *pSrcA++;
|
||||||
{
|
b0 = *pSrcA++;
|
||||||
a0 = *pSrcA++;
|
c0 = *pSrcB++;
|
||||||
b0 = *pSrcA++;
|
d0 = *pSrcB++;
|
||||||
c0 = *pSrcB++;
|
|
||||||
d0 = *pSrcB++;
|
real_sum += ((q63_t)a0 * c0) >> 14;
|
||||||
|
imag_sum += ((q63_t)a0 * d0) >> 14;
|
||||||
real_sum += ((q63_t)a0 * c0) >> 14;
|
real_sum -= ((q63_t)b0 * d0) >> 14;
|
||||||
imag_sum += ((q63_t)a0 * d0) >> 14;
|
imag_sum += ((q63_t)b0 * c0) >> 14;
|
||||||
real_sum -= ((q63_t)b0 * d0) >> 14;
|
|
||||||
imag_sum += ((q63_t)b0 * c0) >> 14;
|
a0 = *pSrcA++;
|
||||||
|
b0 = *pSrcA++;
|
||||||
a0 = *pSrcA++;
|
c0 = *pSrcB++;
|
||||||
b0 = *pSrcA++;
|
d0 = *pSrcB++;
|
||||||
c0 = *pSrcB++;
|
|
||||||
d0 = *pSrcB++;
|
real_sum += ((q63_t)a0 * c0) >> 14;
|
||||||
|
imag_sum += ((q63_t)a0 * d0) >> 14;
|
||||||
real_sum += ((q63_t)a0 * c0) >> 14;
|
real_sum -= ((q63_t)b0 * d0) >> 14;
|
||||||
imag_sum += ((q63_t)a0 * d0) >> 14;
|
imag_sum += ((q63_t)b0 * c0) >> 14;
|
||||||
real_sum -= ((q63_t)b0 * d0) >> 14;
|
|
||||||
imag_sum += ((q63_t)b0 * c0) >> 14;
|
a0 = *pSrcA++;
|
||||||
|
b0 = *pSrcA++;
|
||||||
a0 = *pSrcA++;
|
c0 = *pSrcB++;
|
||||||
b0 = *pSrcA++;
|
d0 = *pSrcB++;
|
||||||
c0 = *pSrcB++;
|
|
||||||
d0 = *pSrcB++;
|
real_sum += ((q63_t)a0 * c0) >> 14;
|
||||||
|
imag_sum += ((q63_t)a0 * d0) >> 14;
|
||||||
real_sum += ((q63_t)a0 * c0) >> 14;
|
real_sum -= ((q63_t)b0 * d0) >> 14;
|
||||||
imag_sum += ((q63_t)a0 * d0) >> 14;
|
imag_sum += ((q63_t)b0 * c0) >> 14;
|
||||||
real_sum -= ((q63_t)b0 * d0) >> 14;
|
|
||||||
imag_sum += ((q63_t)b0 * c0) >> 14;
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
a0 = *pSrcA++;
|
}
|
||||||
b0 = *pSrcA++;
|
|
||||||
c0 = *pSrcB++;
|
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||||
d0 = *pSrcB++;
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = numSamples % 0x4U;
|
||||||
real_sum += ((q63_t)a0 * c0) >> 14;
|
|
||||||
imag_sum += ((q63_t)a0 * d0) >> 14;
|
while (blkCnt > 0U)
|
||||||
real_sum -= ((q63_t)b0 * d0) >> 14;
|
{
|
||||||
imag_sum += ((q63_t)b0 * c0) >> 14;
|
a0 = *pSrcA++;
|
||||||
|
b0 = *pSrcA++;
|
||||||
/* Decrement the loop counter */
|
c0 = *pSrcB++;
|
||||||
blkCnt--;
|
d0 = *pSrcB++;
|
||||||
}
|
|
||||||
|
real_sum += ((q63_t)a0 * c0) >> 14;
|
||||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
imag_sum += ((q63_t)a0 * d0) >> 14;
|
||||||
** No loop unrolling is used. */
|
real_sum -= ((q63_t)b0 * d0) >> 14;
|
||||||
blkCnt = numSamples % 0x4u;
|
imag_sum += ((q63_t)b0 * c0) >> 14;
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
/* Decrement the loop counter */
|
||||||
{
|
blkCnt--;
|
||||||
a0 = *pSrcA++;
|
}
|
||||||
b0 = *pSrcA++;
|
|
||||||
c0 = *pSrcB++;
|
#else
|
||||||
d0 = *pSrcB++;
|
|
||||||
|
/* Run the below code for Cortex-M0 */
|
||||||
real_sum += ((q63_t)a0 * c0) >> 14;
|
|
||||||
imag_sum += ((q63_t)a0 * d0) >> 14;
|
while (numSamples > 0U)
|
||||||
real_sum -= ((q63_t)b0 * d0) >> 14;
|
{
|
||||||
imag_sum += ((q63_t)b0 * c0) >> 14;
|
a0 = *pSrcA++;
|
||||||
|
b0 = *pSrcA++;
|
||||||
/* Decrement the loop counter */
|
c0 = *pSrcB++;
|
||||||
blkCnt--;
|
d0 = *pSrcB++;
|
||||||
}
|
|
||||||
|
real_sum += ((q63_t)a0 * c0) >> 14;
|
||||||
#else
|
imag_sum += ((q63_t)a0 * d0) >> 14;
|
||||||
|
real_sum -= ((q63_t)b0 * d0) >> 14;
|
||||||
/* Run the below code for Cortex-M0 */
|
imag_sum += ((q63_t)b0 * c0) >> 14;
|
||||||
|
|
||||||
while(numSamples > 0u)
|
/* Decrement the loop counter */
|
||||||
{
|
numSamples--;
|
||||||
a0 = *pSrcA++;
|
}
|
||||||
b0 = *pSrcA++;
|
|
||||||
c0 = *pSrcB++;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
d0 = *pSrcB++;
|
|
||||||
|
/* Store the real and imaginary results in 16.48 format */
|
||||||
real_sum += ((q63_t)a0 * c0) >> 14;
|
*realResult = real_sum;
|
||||||
imag_sum += ((q63_t)a0 * d0) >> 14;
|
*imagResult = imag_sum;
|
||||||
real_sum -= ((q63_t)b0 * d0) >> 14;
|
}
|
||||||
imag_sum += ((q63_t)b0 * c0) >> 14;
|
|
||||||
|
/**
|
||||||
/* Decrement the loop counter */
|
* @} end of cmplx_dot_prod group
|
||||||
numSamples--;
|
*/
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
/* Store the real and imaginary results in 16.48 format */
|
|
||||||
*realResult = real_sum;
|
|
||||||
*imagResult = imag_sum;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of cmplx_dot_prod group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,165 +1,153 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cmplx_mag_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Floating-point complex magnitude
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cmplx_mag_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Floating-point complex magnitude.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupCmplxMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @defgroup cmplx_mag Complex Magnitude
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* Computes the magnitude of the elements of a complex data vector.
|
||||||
* ---------------------------------------------------------------------------- */
|
*
|
||||||
|
* The <code>pSrc</code> points to the source data and
|
||||||
#include "arm_math.h"
|
* <code>pDst</code> points to the where the result should be written.
|
||||||
|
* <code>numSamples</code> specifies the number of complex samples
|
||||||
/**
|
* in the input array and the data is stored in an interleaved fashion
|
||||||
* @ingroup groupCmplxMath
|
* (real, imag, real, imag, ...).
|
||||||
*/
|
* The input array has a total of <code>2*numSamples</code> values;
|
||||||
|
* the output array has a total of <code>numSamples</code> values.
|
||||||
/**
|
* The underlying algorithm is used:
|
||||||
* @defgroup cmplx_mag Complex Magnitude
|
*
|
||||||
*
|
* <pre>
|
||||||
* Computes the magnitude of the elements of a complex data vector.
|
* for(n=0; n<numSamples; n++) {
|
||||||
*
|
* pDst[n] = sqrt(pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2);
|
||||||
* The <code>pSrc</code> points to the source data and
|
* }
|
||||||
* <code>pDst</code> points to the where the result should be written.
|
* </pre>
|
||||||
* <code>numSamples</code> specifies the number of complex samples
|
*
|
||||||
* in the input array and the data is stored in an interleaved fashion
|
* There are separate functions for floating-point, Q15, and Q31 data types.
|
||||||
* (real, imag, real, imag, ...).
|
*/
|
||||||
* The input array has a total of <code>2*numSamples</code> values;
|
|
||||||
* the output array has a total of <code>numSamples</code> values.
|
/**
|
||||||
* The underlying algorithm is used:
|
* @addtogroup cmplx_mag
|
||||||
*
|
* @{
|
||||||
* <pre>
|
*/
|
||||||
* for(n=0; n<numSamples; n++) {
|
/**
|
||||||
* pDst[n] = sqrt(pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2);
|
* @brief Floating-point complex magnitude.
|
||||||
* }
|
* @param[in] *pSrc points to complex input buffer
|
||||||
* </pre>
|
* @param[out] *pDst points to real output buffer
|
||||||
*
|
* @param[in] numSamples number of complex samples in the input vector
|
||||||
* There are separate functions for floating-point, Q15, and Q31 data types.
|
* @return none.
|
||||||
*/
|
*
|
||||||
|
*/
|
||||||
/**
|
|
||||||
* @addtogroup cmplx_mag
|
|
||||||
* @{
|
void arm_cmplx_mag_f32(
|
||||||
*/
|
float32_t * pSrc,
|
||||||
/**
|
float32_t * pDst,
|
||||||
* @brief Floating-point complex magnitude.
|
uint32_t numSamples)
|
||||||
* @param[in] *pSrc points to complex input buffer
|
{
|
||||||
* @param[out] *pDst points to real output buffer
|
float32_t realIn, imagIn; /* Temporary variables to hold input values */
|
||||||
* @param[in] numSamples number of complex samples in the input vector
|
|
||||||
* @return none.
|
#if defined (ARM_MATH_DSP)
|
||||||
*
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
uint32_t blkCnt; /* loop counter */
|
||||||
|
|
||||||
void arm_cmplx_mag_f32(
|
/*loop Unrolling */
|
||||||
float32_t * pSrc,
|
blkCnt = numSamples >> 2U;
|
||||||
float32_t * pDst,
|
|
||||||
uint32_t numSamples)
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
{
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
float32_t realIn, imagIn; /* Temporary variables to hold input values */
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
|
||||||
|
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
realIn = *pSrc++;
|
||||||
uint32_t blkCnt; /* loop counter */
|
imagIn = *pSrc++;
|
||||||
|
/* store the result in the destination buffer. */
|
||||||
/*loop Unrolling */
|
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
||||||
blkCnt = numSamples >> 2u;
|
|
||||||
|
realIn = *pSrc++;
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
imagIn = *pSrc++;
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
realIn = *pSrc++;
|
||||||
|
imagIn = *pSrc++;
|
||||||
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
|
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
||||||
realIn = *pSrc++;
|
|
||||||
imagIn = *pSrc++;
|
realIn = *pSrc++;
|
||||||
/* store the result in the destination buffer. */
|
imagIn = *pSrc++;
|
||||||
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
||||||
|
|
||||||
realIn = *pSrc++;
|
|
||||||
imagIn = *pSrc++;
|
/* Decrement the loop counter */
|
||||||
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
blkCnt--;
|
||||||
|
}
|
||||||
realIn = *pSrc++;
|
|
||||||
imagIn = *pSrc++;
|
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||||
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = numSamples % 0x4U;
|
||||||
realIn = *pSrc++;
|
|
||||||
imagIn = *pSrc++;
|
while (blkCnt > 0U)
|
||||||
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
{
|
||||||
|
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
|
||||||
|
realIn = *pSrc++;
|
||||||
/* Decrement the loop counter */
|
imagIn = *pSrc++;
|
||||||
blkCnt--;
|
/* store the result in the destination buffer. */
|
||||||
}
|
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
||||||
|
|
||||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
/* Decrement the loop counter */
|
||||||
** No loop unrolling is used. */
|
blkCnt--;
|
||||||
blkCnt = numSamples % 0x4u;
|
}
|
||||||
|
|
||||||
while(blkCnt > 0u)
|
#else
|
||||||
{
|
|
||||||
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
|
/* Run the below code for Cortex-M0 */
|
||||||
realIn = *pSrc++;
|
|
||||||
imagIn = *pSrc++;
|
while (numSamples > 0U)
|
||||||
/* store the result in the destination buffer. */
|
{
|
||||||
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
/* out = sqrt((real * real) + (imag * imag)) */
|
||||||
|
realIn = *pSrc++;
|
||||||
/* Decrement the loop counter */
|
imagIn = *pSrc++;
|
||||||
blkCnt--;
|
/* store the result in the destination buffer. */
|
||||||
}
|
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
||||||
|
|
||||||
#else
|
/* Decrement the loop counter */
|
||||||
|
numSamples--;
|
||||||
/* Run the below code for Cortex-M0 */
|
}
|
||||||
|
|
||||||
while(numSamples > 0u)
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
{
|
|
||||||
/* out = sqrt((real * real) + (imag * imag)) */
|
}
|
||||||
realIn = *pSrc++;
|
|
||||||
imagIn = *pSrc++;
|
/**
|
||||||
/* store the result in the destination buffer. */
|
* @} end of cmplx_mag group
|
||||||
arm_sqrt_f32((realIn * realIn) + (imagIn * imagIn), pDst++);
|
*/
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
numSamples--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of cmplx_mag group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,153 +1,141 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cmplx_mag_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q15 complex magnitude
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cmplx_mag_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q15 complex magnitude.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupCmplxMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup cmplx_mag
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* ---------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_math.h"
|
/**
|
||||||
|
* @brief Q15 complex magnitude
|
||||||
/**
|
* @param *pSrc points to the complex input vector
|
||||||
* @ingroup groupCmplxMath
|
* @param *pDst points to the real output vector
|
||||||
*/
|
* @param numSamples number of complex samples in the input vector
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup cmplx_mag
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The function implements 1.15 by 1.15 multiplications and finally output is converted into 2.14 format.
|
||||||
|
*/
|
||||||
|
|
||||||
/**
|
void arm_cmplx_mag_q15(
|
||||||
* @brief Q15 complex magnitude
|
q15_t * pSrc,
|
||||||
* @param *pSrc points to the complex input vector
|
q15_t * pDst,
|
||||||
* @param *pDst points to the real output vector
|
uint32_t numSamples)
|
||||||
* @param numSamples number of complex samples in the input vector
|
{
|
||||||
* @return none.
|
q31_t acc0, acc1; /* Accumulators */
|
||||||
*
|
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
#if defined (ARM_MATH_DSP)
|
||||||
* \par
|
|
||||||
* The function implements 1.15 by 1.15 multiplications and finally output is converted into 2.14 format.
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
*/
|
uint32_t blkCnt; /* loop counter */
|
||||||
|
q31_t in1, in2, in3, in4;
|
||||||
void arm_cmplx_mag_q15(
|
q31_t acc2, acc3;
|
||||||
q15_t * pSrc,
|
|
||||||
q15_t * pDst,
|
|
||||||
uint32_t numSamples)
|
/*loop Unrolling */
|
||||||
{
|
blkCnt = numSamples >> 2U;
|
||||||
q31_t acc0, acc1; /* Accumulators */
|
|
||||||
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
{
|
||||||
uint32_t blkCnt; /* loop counter */
|
|
||||||
q31_t in1, in2, in3, in4;
|
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
|
||||||
q31_t acc2, acc3;
|
in1 = *__SIMD32(pSrc)++;
|
||||||
|
in2 = *__SIMD32(pSrc)++;
|
||||||
|
in3 = *__SIMD32(pSrc)++;
|
||||||
/*loop Unrolling */
|
in4 = *__SIMD32(pSrc)++;
|
||||||
blkCnt = numSamples >> 2u;
|
|
||||||
|
acc0 = __SMUAD(in1, in1);
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
acc1 = __SMUAD(in2, in2);
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
acc2 = __SMUAD(in3, in3);
|
||||||
while(blkCnt > 0u)
|
acc3 = __SMUAD(in4, in4);
|
||||||
{
|
|
||||||
|
/* store the result in 2.14 format in the destination buffer. */
|
||||||
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
|
arm_sqrt_q15((q15_t) ((acc0) >> 17), pDst++);
|
||||||
in1 = *__SIMD32(pSrc)++;
|
arm_sqrt_q15((q15_t) ((acc1) >> 17), pDst++);
|
||||||
in2 = *__SIMD32(pSrc)++;
|
arm_sqrt_q15((q15_t) ((acc2) >> 17), pDst++);
|
||||||
in3 = *__SIMD32(pSrc)++;
|
arm_sqrt_q15((q15_t) ((acc3) >> 17), pDst++);
|
||||||
in4 = *__SIMD32(pSrc)++;
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
acc0 = __SMUAD(in1, in1);
|
blkCnt--;
|
||||||
acc1 = __SMUAD(in2, in2);
|
}
|
||||||
acc2 = __SMUAD(in3, in3);
|
|
||||||
acc3 = __SMUAD(in4, in4);
|
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||||
|
** No loop unrolling is used. */
|
||||||
/* store the result in 2.14 format in the destination buffer. */
|
blkCnt = numSamples % 0x4U;
|
||||||
arm_sqrt_q15((q15_t) ((acc0) >> 17), pDst++);
|
|
||||||
arm_sqrt_q15((q15_t) ((acc1) >> 17), pDst++);
|
while (blkCnt > 0U)
|
||||||
arm_sqrt_q15((q15_t) ((acc2) >> 17), pDst++);
|
{
|
||||||
arm_sqrt_q15((q15_t) ((acc3) >> 17), pDst++);
|
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
|
||||||
|
in1 = *__SIMD32(pSrc)++;
|
||||||
/* Decrement the loop counter */
|
acc0 = __SMUAD(in1, in1);
|
||||||
blkCnt--;
|
|
||||||
}
|
/* store the result in 2.14 format in the destination buffer. */
|
||||||
|
arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
|
||||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
/* Decrement the loop counter */
|
||||||
blkCnt = numSamples % 0x4u;
|
blkCnt--;
|
||||||
|
}
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
#else
|
||||||
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
|
|
||||||
in1 = *__SIMD32(pSrc)++;
|
/* Run the below code for Cortex-M0 */
|
||||||
acc0 = __SMUAD(in1, in1);
|
q15_t real, imag; /* Temporary variables to hold input values */
|
||||||
|
|
||||||
/* store the result in 2.14 format in the destination buffer. */
|
while (numSamples > 0U)
|
||||||
arm_sqrt_q15((q15_t) (acc0 >> 17), pDst++);
|
{
|
||||||
|
/* out = sqrt(real * real + imag * imag) */
|
||||||
/* Decrement the loop counter */
|
real = *pSrc++;
|
||||||
blkCnt--;
|
imag = *pSrc++;
|
||||||
}
|
|
||||||
|
acc0 = (real * real);
|
||||||
#else
|
acc1 = (imag * imag);
|
||||||
|
|
||||||
/* Run the below code for Cortex-M0 */
|
/* store the result in 2.14 format in the destination buffer. */
|
||||||
q15_t real, imag; /* Temporary variables to hold input values */
|
arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
|
||||||
|
|
||||||
while(numSamples > 0u)
|
/* Decrement the loop counter */
|
||||||
{
|
numSamples--;
|
||||||
/* out = sqrt(real * real + imag * imag) */
|
}
|
||||||
real = *pSrc++;
|
|
||||||
imag = *pSrc++;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
acc0 = (real * real);
|
}
|
||||||
acc1 = (imag * imag);
|
|
||||||
|
/**
|
||||||
/* store the result in 2.14 format in the destination buffer. */
|
* @} end of cmplx_mag group
|
||||||
arm_sqrt_q15((q15_t) (((q63_t) acc0 + acc1) >> 17), pDst++);
|
*/
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
numSamples--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of cmplx_mag group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,185 +1,173 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cmplx_mag_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q31 complex magnitude
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cmplx_mag_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q31 complex magnitude
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupCmplxMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup cmplx_mag
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* ---------------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Q31 complex magnitude
|
||||||
|
* @param *pSrc points to the complex input vector
|
||||||
/**
|
* @param *pDst points to the real output vector
|
||||||
* @ingroup groupCmplxMath
|
* @param numSamples number of complex samples in the input vector
|
||||||
*/
|
* @return none.
|
||||||
|
*
|
||||||
/**
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @addtogroup cmplx_mag
|
* \par
|
||||||
* @{
|
* The function implements 1.31 by 1.31 multiplications and finally output is converted into 2.30 format.
|
||||||
*/
|
* Input down scaling is not required.
|
||||||
|
*/
|
||||||
/**
|
|
||||||
* @brief Q31 complex magnitude
|
void arm_cmplx_mag_q31(
|
||||||
* @param *pSrc points to the complex input vector
|
q31_t * pSrc,
|
||||||
* @param *pDst points to the real output vector
|
q31_t * pDst,
|
||||||
* @param numSamples number of complex samples in the input vector
|
uint32_t numSamples)
|
||||||
* @return none.
|
{
|
||||||
*
|
q31_t real, imag; /* Temporary variables to hold input values */
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
q31_t acc0, acc1; /* Accumulators */
|
||||||
* \par
|
uint32_t blkCnt; /* loop counter */
|
||||||
* The function implements 1.31 by 1.31 multiplications and finally output is converted into 2.30 format.
|
|
||||||
* Input down scaling is not required.
|
#if defined (ARM_MATH_DSP)
|
||||||
*/
|
|
||||||
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
void arm_cmplx_mag_q31(
|
q31_t real1, real2, imag1, imag2; /* Temporary variables to hold input values */
|
||||||
q31_t * pSrc,
|
q31_t out1, out2, out3, out4; /* Accumulators */
|
||||||
q31_t * pDst,
|
q63_t mul1, mul2, mul3, mul4; /* Temporary variables */
|
||||||
uint32_t numSamples)
|
|
||||||
{
|
|
||||||
q31_t real, imag; /* Temporary variables to hold input values */
|
/*loop Unrolling */
|
||||||
q31_t acc0, acc1; /* Accumulators */
|
blkCnt = numSamples >> 2U;
|
||||||
uint32_t blkCnt; /* loop counter */
|
|
||||||
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
{
|
||||||
q31_t real1, real2, imag1, imag2; /* Temporary variables to hold input values */
|
/* read complex input from source buffer */
|
||||||
q31_t out1, out2, out3, out4; /* Accumulators */
|
real1 = pSrc[0];
|
||||||
q63_t mul1, mul2, mul3, mul4; /* Temporary variables */
|
imag1 = pSrc[1];
|
||||||
|
real2 = pSrc[2];
|
||||||
|
imag2 = pSrc[3];
|
||||||
/*loop Unrolling */
|
|
||||||
blkCnt = numSamples >> 2u;
|
/* calculate power of input values */
|
||||||
|
mul1 = (q63_t) real1 *real1;
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
mul2 = (q63_t) imag1 *imag1;
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
mul3 = (q63_t) real2 *real2;
|
||||||
while(blkCnt > 0u)
|
mul4 = (q63_t) imag2 *imag2;
|
||||||
{
|
|
||||||
/* read complex input from source buffer */
|
/* get the result to 3.29 format */
|
||||||
real1 = pSrc[0];
|
out1 = (q31_t) (mul1 >> 33);
|
||||||
imag1 = pSrc[1];
|
out2 = (q31_t) (mul2 >> 33);
|
||||||
real2 = pSrc[2];
|
out3 = (q31_t) (mul3 >> 33);
|
||||||
imag2 = pSrc[3];
|
out4 = (q31_t) (mul4 >> 33);
|
||||||
|
|
||||||
/* calculate power of input values */
|
/* add real and imaginary accumulators */
|
||||||
mul1 = (q63_t) real1 *real1;
|
out1 = out1 + out2;
|
||||||
mul2 = (q63_t) imag1 *imag1;
|
out3 = out3 + out4;
|
||||||
mul3 = (q63_t) real2 *real2;
|
|
||||||
mul4 = (q63_t) imag2 *imag2;
|
/* read complex input from source buffer */
|
||||||
|
real1 = pSrc[4];
|
||||||
/* get the result to 3.29 format */
|
imag1 = pSrc[5];
|
||||||
out1 = (q31_t) (mul1 >> 33);
|
real2 = pSrc[6];
|
||||||
out2 = (q31_t) (mul2 >> 33);
|
imag2 = pSrc[7];
|
||||||
out3 = (q31_t) (mul3 >> 33);
|
|
||||||
out4 = (q31_t) (mul4 >> 33);
|
/* calculate square root */
|
||||||
|
arm_sqrt_q31(out1, &pDst[0]);
|
||||||
/* add real and imaginary accumulators */
|
|
||||||
out1 = out1 + out2;
|
/* calculate power of input values */
|
||||||
out3 = out3 + out4;
|
mul1 = (q63_t) real1 *real1;
|
||||||
|
|
||||||
/* read complex input from source buffer */
|
/* calculate square root */
|
||||||
real1 = pSrc[4];
|
arm_sqrt_q31(out3, &pDst[1]);
|
||||||
imag1 = pSrc[5];
|
|
||||||
real2 = pSrc[6];
|
/* calculate power of input values */
|
||||||
imag2 = pSrc[7];
|
mul2 = (q63_t) imag1 *imag1;
|
||||||
|
mul3 = (q63_t) real2 *real2;
|
||||||
/* calculate square root */
|
mul4 = (q63_t) imag2 *imag2;
|
||||||
arm_sqrt_q31(out1, &pDst[0]);
|
|
||||||
|
/* get the result to 3.29 format */
|
||||||
/* calculate power of input values */
|
out1 = (q31_t) (mul1 >> 33);
|
||||||
mul1 = (q63_t) real1 *real1;
|
out2 = (q31_t) (mul2 >> 33);
|
||||||
|
out3 = (q31_t) (mul3 >> 33);
|
||||||
/* calculate square root */
|
out4 = (q31_t) (mul4 >> 33);
|
||||||
arm_sqrt_q31(out3, &pDst[1]);
|
|
||||||
|
/* add real and imaginary accumulators */
|
||||||
/* calculate power of input values */
|
out1 = out1 + out2;
|
||||||
mul2 = (q63_t) imag1 *imag1;
|
out3 = out3 + out4;
|
||||||
mul3 = (q63_t) real2 *real2;
|
|
||||||
mul4 = (q63_t) imag2 *imag2;
|
/* calculate square root */
|
||||||
|
arm_sqrt_q31(out1, &pDst[2]);
|
||||||
/* get the result to 3.29 format */
|
|
||||||
out1 = (q31_t) (mul1 >> 33);
|
/* increment destination by 8 to process next samples */
|
||||||
out2 = (q31_t) (mul2 >> 33);
|
pSrc += 8U;
|
||||||
out3 = (q31_t) (mul3 >> 33);
|
|
||||||
out4 = (q31_t) (mul4 >> 33);
|
/* calculate square root */
|
||||||
|
arm_sqrt_q31(out3, &pDst[3]);
|
||||||
/* add real and imaginary accumulators */
|
|
||||||
out1 = out1 + out2;
|
/* increment destination by 4 to process next samples */
|
||||||
out3 = out3 + out4;
|
pDst += 4U;
|
||||||
|
|
||||||
/* calculate square root */
|
/* Decrement the loop counter */
|
||||||
arm_sqrt_q31(out1, &pDst[2]);
|
blkCnt--;
|
||||||
|
}
|
||||||
/* increment destination by 8 to process next samples */
|
|
||||||
pSrc += 8u;
|
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||||
|
** No loop unrolling is used. */
|
||||||
/* calculate square root */
|
blkCnt = numSamples % 0x4U;
|
||||||
arm_sqrt_q31(out3, &pDst[3]);
|
|
||||||
|
#else
|
||||||
/* increment destination by 4 to process next samples */
|
|
||||||
pDst += 4u;
|
/* Run the below code for Cortex-M0 */
|
||||||
|
blkCnt = numSamples;
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
}
|
|
||||||
|
while (blkCnt > 0U)
|
||||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
{
|
||||||
** No loop unrolling is used. */
|
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
|
||||||
blkCnt = numSamples % 0x4u;
|
real = *pSrc++;
|
||||||
|
imag = *pSrc++;
|
||||||
#else
|
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
||||||
|
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
||||||
/* Run the below code for Cortex-M0 */
|
/* store the result in 2.30 format in the destination buffer. */
|
||||||
blkCnt = numSamples;
|
arm_sqrt_q31(acc0 + acc1, pDst++);
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
while(blkCnt > 0u)
|
}
|
||||||
{
|
}
|
||||||
/* C[0] = sqrt(A[0] * A[0] + A[1] * A[1]) */
|
|
||||||
real = *pSrc++;
|
/**
|
||||||
imag = *pSrc++;
|
* @} end of cmplx_mag group
|
||||||
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
*/
|
||||||
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
|
||||||
/* store the result in 2.30 format in the destination buffer. */
|
|
||||||
arm_sqrt_q31(acc0 + acc1, pDst++);
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of cmplx_mag group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,215 +1,204 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cmplx_mag_squared_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Floating-point complex magnitude squared
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cmplx_mag_squared_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Floating-point complex magnitude squared.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupCmplxMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @defgroup cmplx_mag_squared Complex Magnitude Squared
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* Computes the magnitude squared of the elements of a complex data vector.
|
||||||
* ---------------------------------------------------------------------------- */
|
*
|
||||||
#include "arm_math.h"
|
* The <code>pSrc</code> points to the source data and
|
||||||
|
* <code>pDst</code> points to the where the result should be written.
|
||||||
/**
|
* <code>numSamples</code> specifies the number of complex samples
|
||||||
* @ingroup groupCmplxMath
|
* in the input array and the data is stored in an interleaved fashion
|
||||||
*/
|
* (real, imag, real, imag, ...).
|
||||||
|
* The input array has a total of <code>2*numSamples</code> values;
|
||||||
/**
|
* the output array has a total of <code>numSamples</code> values.
|
||||||
* @defgroup cmplx_mag_squared Complex Magnitude Squared
|
*
|
||||||
*
|
* The underlying algorithm is used:
|
||||||
* Computes the magnitude squared of the elements of a complex data vector.
|
*
|
||||||
*
|
* <pre>
|
||||||
* The <code>pSrc</code> points to the source data and
|
* for(n=0; n<numSamples; n++) {
|
||||||
* <code>pDst</code> points to the where the result should be written.
|
* pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;
|
||||||
* <code>numSamples</code> specifies the number of complex samples
|
* }
|
||||||
* in the input array and the data is stored in an interleaved fashion
|
* </pre>
|
||||||
* (real, imag, real, imag, ...).
|
*
|
||||||
* The input array has a total of <code>2*numSamples</code> values;
|
* There are separate functions for floating-point, Q15, and Q31 data types.
|
||||||
* the output array has a total of <code>numSamples</code> values.
|
*/
|
||||||
*
|
|
||||||
* The underlying algorithm is used:
|
/**
|
||||||
*
|
* @addtogroup cmplx_mag_squared
|
||||||
* <pre>
|
* @{
|
||||||
* for(n=0; n<numSamples; n++) {
|
*/
|
||||||
* pDst[n] = pSrc[(2*n)+0]^2 + pSrc[(2*n)+1]^2;
|
|
||||||
* }
|
|
||||||
* </pre>
|
/**
|
||||||
*
|
* @brief Floating-point complex magnitude squared
|
||||||
* There are separate functions for floating-point, Q15, and Q31 data types.
|
* @param[in] *pSrc points to the complex input vector
|
||||||
*/
|
* @param[out] *pDst points to the real output vector
|
||||||
|
* @param[in] numSamples number of complex samples in the input vector
|
||||||
/**
|
* @return none.
|
||||||
* @addtogroup cmplx_mag_squared
|
*/
|
||||||
* @{
|
|
||||||
*/
|
void arm_cmplx_mag_squared_f32(
|
||||||
|
float32_t * pSrc,
|
||||||
|
float32_t * pDst,
|
||||||
/**
|
uint32_t numSamples)
|
||||||
* @brief Floating-point complex magnitude squared
|
{
|
||||||
* @param[in] *pSrc points to the complex input vector
|
float32_t real, imag; /* Temporary variables to store real and imaginary values */
|
||||||
* @param[out] *pDst points to the real output vector
|
uint32_t blkCnt; /* loop counter */
|
||||||
* @param[in] numSamples number of complex samples in the input vector
|
|
||||||
* @return none.
|
#if defined (ARM_MATH_DSP)
|
||||||
*/
|
float32_t real1, real2, real3, real4; /* Temporary variables to hold real values */
|
||||||
|
float32_t imag1, imag2, imag3, imag4; /* Temporary variables to hold imaginary values */
|
||||||
void arm_cmplx_mag_squared_f32(
|
float32_t mul1, mul2, mul3, mul4; /* Temporary variables */
|
||||||
float32_t * pSrc,
|
float32_t mul5, mul6, mul7, mul8; /* Temporary variables */
|
||||||
float32_t * pDst,
|
float32_t out1, out2, out3, out4; /* Temporary variables to hold output values */
|
||||||
uint32_t numSamples)
|
|
||||||
{
|
/*loop Unrolling */
|
||||||
float32_t real, imag; /* Temporary variables to store real and imaginary values */
|
blkCnt = numSamples >> 2U;
|
||||||
uint32_t blkCnt; /* loop counter */
|
|
||||||
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
float32_t real1, real2, real3, real4; /* Temporary variables to hold real values */
|
while (blkCnt > 0U)
|
||||||
float32_t imag1, imag2, imag3, imag4; /* Temporary variables to hold imaginary values */
|
{
|
||||||
float32_t mul1, mul2, mul3, mul4; /* Temporary variables */
|
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
|
||||||
float32_t mul5, mul6, mul7, mul8; /* Temporary variables */
|
/* read real input sample from source buffer */
|
||||||
float32_t out1, out2, out3, out4; /* Temporary variables to hold output values */
|
real1 = pSrc[0];
|
||||||
|
/* read imaginary input sample from source buffer */
|
||||||
/*loop Unrolling */
|
imag1 = pSrc[1];
|
||||||
blkCnt = numSamples >> 2u;
|
|
||||||
|
/* calculate power of real value */
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
mul1 = real1 * real1;
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
|
||||||
while(blkCnt > 0u)
|
/* read real input sample from source buffer */
|
||||||
{
|
real2 = pSrc[2];
|
||||||
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
|
|
||||||
/* read real input sample from source buffer */
|
/* calculate power of imaginary value */
|
||||||
real1 = pSrc[0];
|
mul2 = imag1 * imag1;
|
||||||
/* read imaginary input sample from source buffer */
|
|
||||||
imag1 = pSrc[1];
|
/* read imaginary input sample from source buffer */
|
||||||
|
imag2 = pSrc[3];
|
||||||
/* calculate power of real value */
|
|
||||||
mul1 = real1 * real1;
|
/* calculate power of real value */
|
||||||
|
mul3 = real2 * real2;
|
||||||
/* read real input sample from source buffer */
|
|
||||||
real2 = pSrc[2];
|
/* read real input sample from source buffer */
|
||||||
|
real3 = pSrc[4];
|
||||||
/* calculate power of imaginary value */
|
|
||||||
mul2 = imag1 * imag1;
|
/* calculate power of imaginary value */
|
||||||
|
mul4 = imag2 * imag2;
|
||||||
/* read imaginary input sample from source buffer */
|
|
||||||
imag2 = pSrc[3];
|
/* read imaginary input sample from source buffer */
|
||||||
|
imag3 = pSrc[5];
|
||||||
/* calculate power of real value */
|
|
||||||
mul3 = real2 * real2;
|
/* calculate power of real value */
|
||||||
|
mul5 = real3 * real3;
|
||||||
/* read real input sample from source buffer */
|
/* calculate power of imaginary value */
|
||||||
real3 = pSrc[4];
|
mul6 = imag3 * imag3;
|
||||||
|
|
||||||
/* calculate power of imaginary value */
|
/* read real input sample from source buffer */
|
||||||
mul4 = imag2 * imag2;
|
real4 = pSrc[6];
|
||||||
|
|
||||||
/* read imaginary input sample from source buffer */
|
/* accumulate real and imaginary powers */
|
||||||
imag3 = pSrc[5];
|
out1 = mul1 + mul2;
|
||||||
|
|
||||||
/* calculate power of real value */
|
/* read imaginary input sample from source buffer */
|
||||||
mul5 = real3 * real3;
|
imag4 = pSrc[7];
|
||||||
/* calculate power of imaginary value */
|
|
||||||
mul6 = imag3 * imag3;
|
/* accumulate real and imaginary powers */
|
||||||
|
out2 = mul3 + mul4;
|
||||||
/* read real input sample from source buffer */
|
|
||||||
real4 = pSrc[6];
|
/* calculate power of real value */
|
||||||
|
mul7 = real4 * real4;
|
||||||
/* accumulate real and imaginary powers */
|
/* calculate power of imaginary value */
|
||||||
out1 = mul1 + mul2;
|
mul8 = imag4 * imag4;
|
||||||
|
|
||||||
/* read imaginary input sample from source buffer */
|
/* store output to destination */
|
||||||
imag4 = pSrc[7];
|
pDst[0] = out1;
|
||||||
|
|
||||||
/* accumulate real and imaginary powers */
|
/* accumulate real and imaginary powers */
|
||||||
out2 = mul3 + mul4;
|
out3 = mul5 + mul6;
|
||||||
|
|
||||||
/* calculate power of real value */
|
/* store output to destination */
|
||||||
mul7 = real4 * real4;
|
pDst[1] = out2;
|
||||||
/* calculate power of imaginary value */
|
|
||||||
mul8 = imag4 * imag4;
|
/* accumulate real and imaginary powers */
|
||||||
|
out4 = mul7 + mul8;
|
||||||
/* store output to destination */
|
|
||||||
pDst[0] = out1;
|
/* store output to destination */
|
||||||
|
pDst[2] = out3;
|
||||||
/* accumulate real and imaginary powers */
|
|
||||||
out3 = mul5 + mul6;
|
/* increment destination pointer by 8 to process next samples */
|
||||||
|
pSrc += 8U;
|
||||||
/* store output to destination */
|
|
||||||
pDst[1] = out2;
|
/* store output to destination */
|
||||||
|
pDst[3] = out4;
|
||||||
/* accumulate real and imaginary powers */
|
|
||||||
out4 = mul7 + mul8;
|
/* increment destination pointer by 4 to process next samples */
|
||||||
|
pDst += 4U;
|
||||||
/* store output to destination */
|
|
||||||
pDst[2] = out3;
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* increment destination pointer by 8 to process next samples */
|
}
|
||||||
pSrc += 8u;
|
|
||||||
|
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||||
/* store output to destination */
|
** No loop unrolling is used. */
|
||||||
pDst[3] = out4;
|
blkCnt = numSamples % 0x4U;
|
||||||
|
|
||||||
/* increment destination pointer by 4 to process next samples */
|
#else
|
||||||
pDst += 4u;
|
|
||||||
|
/* Run the below code for Cortex-M0 */
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
blkCnt = numSamples;
|
||||||
}
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
while (blkCnt > 0U)
|
||||||
blkCnt = numSamples % 0x4u;
|
{
|
||||||
|
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
|
||||||
#else
|
real = *pSrc++;
|
||||||
|
imag = *pSrc++;
|
||||||
/* Run the below code for Cortex-M0 */
|
|
||||||
|
/* out = (real * real) + (imag * imag) */
|
||||||
blkCnt = numSamples;
|
/* store the result in the destination buffer. */
|
||||||
|
*pDst++ = (real * real) + (imag * imag);
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
while(blkCnt > 0u)
|
blkCnt--;
|
||||||
{
|
}
|
||||||
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
|
}
|
||||||
real = *pSrc++;
|
|
||||||
imag = *pSrc++;
|
/**
|
||||||
|
* @} end of cmplx_mag_squared group
|
||||||
/* out = (real * real) + (imag * imag) */
|
*/
|
||||||
/* store the result in the destination buffer. */
|
|
||||||
*pDst++ = (real * real) + (imag * imag);
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of cmplx_mag_squared group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,148 +1,136 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cmplx_mag_squared_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q15 complex magnitude squared
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cmplx_mag_squared_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q15 complex magnitude squared.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupCmplxMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup cmplx_mag_squared
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* ---------------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Q15 complex magnitude squared
|
||||||
|
* @param *pSrc points to the complex input vector
|
||||||
/**
|
* @param *pDst points to the real output vector
|
||||||
* @ingroup groupCmplxMath
|
* @param numSamples number of complex samples in the input vector
|
||||||
*/
|
* @return none.
|
||||||
|
*
|
||||||
/**
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @addtogroup cmplx_mag_squared
|
* \par
|
||||||
* @{
|
* The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
void arm_cmplx_mag_squared_q15(
|
||||||
* @brief Q15 complex magnitude squared
|
q15_t * pSrc,
|
||||||
* @param *pSrc points to the complex input vector
|
q15_t * pDst,
|
||||||
* @param *pDst points to the real output vector
|
uint32_t numSamples)
|
||||||
* @param numSamples number of complex samples in the input vector
|
{
|
||||||
* @return none.
|
q31_t acc0, acc1; /* Accumulators */
|
||||||
*
|
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
#if defined (ARM_MATH_DSP)
|
||||||
* \par
|
|
||||||
* The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format.
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
*/
|
uint32_t blkCnt; /* loop counter */
|
||||||
|
q31_t in1, in2, in3, in4;
|
||||||
void arm_cmplx_mag_squared_q15(
|
q31_t acc2, acc3;
|
||||||
q15_t * pSrc,
|
|
||||||
q15_t * pDst,
|
/*loop Unrolling */
|
||||||
uint32_t numSamples)
|
blkCnt = numSamples >> 2U;
|
||||||
{
|
|
||||||
q31_t acc0, acc1; /* Accumulators */
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
|
||||||
uint32_t blkCnt; /* loop counter */
|
in1 = *__SIMD32(pSrc)++;
|
||||||
q31_t in1, in2, in3, in4;
|
in2 = *__SIMD32(pSrc)++;
|
||||||
q31_t acc2, acc3;
|
in3 = *__SIMD32(pSrc)++;
|
||||||
|
in4 = *__SIMD32(pSrc)++;
|
||||||
/*loop Unrolling */
|
|
||||||
blkCnt = numSamples >> 2u;
|
acc0 = __SMUAD(in1, in1);
|
||||||
|
acc1 = __SMUAD(in2, in2);
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
acc2 = __SMUAD(in3, in3);
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
acc3 = __SMUAD(in4, in4);
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
/* store the result in 3.13 format in the destination buffer. */
|
||||||
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
|
*pDst++ = (q15_t) (acc0 >> 17);
|
||||||
in1 = *__SIMD32(pSrc)++;
|
*pDst++ = (q15_t) (acc1 >> 17);
|
||||||
in2 = *__SIMD32(pSrc)++;
|
*pDst++ = (q15_t) (acc2 >> 17);
|
||||||
in3 = *__SIMD32(pSrc)++;
|
*pDst++ = (q15_t) (acc3 >> 17);
|
||||||
in4 = *__SIMD32(pSrc)++;
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
acc0 = __SMUAD(in1, in1);
|
blkCnt--;
|
||||||
acc1 = __SMUAD(in2, in2);
|
}
|
||||||
acc2 = __SMUAD(in3, in3);
|
|
||||||
acc3 = __SMUAD(in4, in4);
|
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||||
|
** No loop unrolling is used. */
|
||||||
/* store the result in 3.13 format in the destination buffer. */
|
blkCnt = numSamples % 0x4U;
|
||||||
*pDst++ = (q15_t) (acc0 >> 17);
|
|
||||||
*pDst++ = (q15_t) (acc1 >> 17);
|
while (blkCnt > 0U)
|
||||||
*pDst++ = (q15_t) (acc2 >> 17);
|
{
|
||||||
*pDst++ = (q15_t) (acc3 >> 17);
|
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
|
||||||
|
in1 = *__SIMD32(pSrc)++;
|
||||||
/* Decrement the loop counter */
|
acc0 = __SMUAD(in1, in1);
|
||||||
blkCnt--;
|
|
||||||
}
|
/* store the result in 3.13 format in the destination buffer. */
|
||||||
|
*pDst++ = (q15_t) (acc0 >> 17);
|
||||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
/* Decrement the loop counter */
|
||||||
blkCnt = numSamples % 0x4u;
|
blkCnt--;
|
||||||
|
}
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
#else
|
||||||
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
|
|
||||||
in1 = *__SIMD32(pSrc)++;
|
/* Run the below code for Cortex-M0 */
|
||||||
acc0 = __SMUAD(in1, in1);
|
q15_t real, imag; /* Temporary variables to store real and imaginary values */
|
||||||
|
|
||||||
/* store the result in 3.13 format in the destination buffer. */
|
while (numSamples > 0U)
|
||||||
*pDst++ = (q15_t) (acc0 >> 17);
|
{
|
||||||
|
/* out = ((real * real) + (imag * imag)) */
|
||||||
/* Decrement the loop counter */
|
real = *pSrc++;
|
||||||
blkCnt--;
|
imag = *pSrc++;
|
||||||
}
|
acc0 = (real * real);
|
||||||
|
acc1 = (imag * imag);
|
||||||
#else
|
/* store the result in 3.13 format in the destination buffer. */
|
||||||
|
*pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
|
||||||
/* Run the below code for Cortex-M0 */
|
|
||||||
q15_t real, imag; /* Temporary variables to store real and imaginary values */
|
/* Decrement the loop counter */
|
||||||
|
numSamples--;
|
||||||
while(numSamples > 0u)
|
}
|
||||||
{
|
|
||||||
/* out = ((real * real) + (imag * imag)) */
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
real = *pSrc++;
|
|
||||||
imag = *pSrc++;
|
}
|
||||||
acc0 = (real * real);
|
|
||||||
acc1 = (imag * imag);
|
/**
|
||||||
/* store the result in 3.13 format in the destination buffer. */
|
* @} end of cmplx_mag_squared group
|
||||||
*pDst++ = (q15_t) (((q63_t) acc0 + acc1) >> 17);
|
*/
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
numSamples--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of cmplx_mag_squared group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,161 +1,149 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cmplx_mag_squared_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q31 complex magnitude squared
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cmplx_mag_squared_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q31 complex magnitude squared.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupCmplxMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup cmplx_mag_squared
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* ---------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_math.h"
|
/**
|
||||||
|
* @brief Q31 complex magnitude squared
|
||||||
/**
|
* @param *pSrc points to the complex input vector
|
||||||
* @ingroup groupCmplxMath
|
* @param *pDst points to the real output vector
|
||||||
*/
|
* @param numSamples number of complex samples in the input vector
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup cmplx_mag_squared
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
|
||||||
|
* Input down scaling is not required.
|
||||||
|
*/
|
||||||
/**
|
|
||||||
* @brief Q31 complex magnitude squared
|
void arm_cmplx_mag_squared_q31(
|
||||||
* @param *pSrc points to the complex input vector
|
q31_t * pSrc,
|
||||||
* @param *pDst points to the real output vector
|
q31_t * pDst,
|
||||||
* @param numSamples number of complex samples in the input vector
|
uint32_t numSamples)
|
||||||
* @return none.
|
{
|
||||||
*
|
q31_t real, imag; /* Temporary variables to store real and imaginary values */
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
q31_t acc0, acc1; /* Accumulators */
|
||||||
* \par
|
|
||||||
* The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
|
#if defined (ARM_MATH_DSP)
|
||||||
* Input down scaling is not required.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
uint32_t blkCnt; /* loop counter */
|
||||||
void arm_cmplx_mag_squared_q31(
|
|
||||||
q31_t * pSrc,
|
/* loop Unrolling */
|
||||||
q31_t * pDst,
|
blkCnt = numSamples >> 2U;
|
||||||
uint32_t numSamples)
|
|
||||||
{
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
q31_t real, imag; /* Temporary variables to store real and imaginary values */
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
q31_t acc0, acc1; /* Accumulators */
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
|
||||||
|
real = *pSrc++;
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
imag = *pSrc++;
|
||||||
uint32_t blkCnt; /* loop counter */
|
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
||||||
|
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
||||||
/* loop Unrolling */
|
/* store the result in 3.29 format in the destination buffer. */
|
||||||
blkCnt = numSamples >> 2u;
|
*pDst++ = acc0 + acc1;
|
||||||
|
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
real = *pSrc++;
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
imag = *pSrc++;
|
||||||
while(blkCnt > 0u)
|
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
||||||
{
|
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
||||||
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
|
/* store the result in 3.29 format in the destination buffer. */
|
||||||
real = *pSrc++;
|
*pDst++ = acc0 + acc1;
|
||||||
imag = *pSrc++;
|
|
||||||
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
real = *pSrc++;
|
||||||
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
imag = *pSrc++;
|
||||||
/* store the result in 3.29 format in the destination buffer. */
|
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
||||||
*pDst++ = acc0 + acc1;
|
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
||||||
|
/* store the result in 3.29 format in the destination buffer. */
|
||||||
real = *pSrc++;
|
*pDst++ = acc0 + acc1;
|
||||||
imag = *pSrc++;
|
|
||||||
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
real = *pSrc++;
|
||||||
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
imag = *pSrc++;
|
||||||
/* store the result in 3.29 format in the destination buffer. */
|
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
||||||
*pDst++ = acc0 + acc1;
|
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
||||||
|
/* store the result in 3.29 format in the destination buffer. */
|
||||||
real = *pSrc++;
|
*pDst++ = acc0 + acc1;
|
||||||
imag = *pSrc++;
|
|
||||||
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
/* Decrement the loop counter */
|
||||||
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
blkCnt--;
|
||||||
/* store the result in 3.29 format in the destination buffer. */
|
}
|
||||||
*pDst++ = acc0 + acc1;
|
|
||||||
|
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||||
real = *pSrc++;
|
** No loop unrolling is used. */
|
||||||
imag = *pSrc++;
|
blkCnt = numSamples % 0x4U;
|
||||||
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
|
||||||
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
while (blkCnt > 0U)
|
||||||
/* store the result in 3.29 format in the destination buffer. */
|
{
|
||||||
*pDst++ = acc0 + acc1;
|
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
|
||||||
|
real = *pSrc++;
|
||||||
/* Decrement the loop counter */
|
imag = *pSrc++;
|
||||||
blkCnt--;
|
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
||||||
}
|
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
||||||
|
/* store the result in 3.29 format in the destination buffer. */
|
||||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
*pDst++ = acc0 + acc1;
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = numSamples % 0x4u;
|
/* Decrement the loop counter */
|
||||||
|
blkCnt--;
|
||||||
while(blkCnt > 0u)
|
}
|
||||||
{
|
|
||||||
/* C[0] = (A[0] * A[0] + A[1] * A[1]) */
|
#else
|
||||||
real = *pSrc++;
|
|
||||||
imag = *pSrc++;
|
/* Run the below code for Cortex-M0 */
|
||||||
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
|
||||||
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
while (numSamples > 0U)
|
||||||
/* store the result in 3.29 format in the destination buffer. */
|
{
|
||||||
*pDst++ = acc0 + acc1;
|
/* out = ((real * real) + (imag * imag)) */
|
||||||
|
real = *pSrc++;
|
||||||
/* Decrement the loop counter */
|
imag = *pSrc++;
|
||||||
blkCnt--;
|
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
||||||
}
|
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
||||||
|
/* store the result in 3.29 format in the destination buffer. */
|
||||||
#else
|
*pDst++ = acc0 + acc1;
|
||||||
|
|
||||||
/* Run the below code for Cortex-M0 */
|
/* Decrement the loop counter */
|
||||||
|
numSamples--;
|
||||||
while(numSamples > 0u)
|
}
|
||||||
{
|
|
||||||
/* out = ((real * real) + (imag * imag)) */
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
real = *pSrc++;
|
|
||||||
imag = *pSrc++;
|
}
|
||||||
acc0 = (q31_t) (((q63_t) real * real) >> 33);
|
|
||||||
acc1 = (q31_t) (((q63_t) imag * imag) >> 33);
|
/**
|
||||||
/* store the result in 3.29 format in the destination buffer. */
|
* @} end of cmplx_mag_squared group
|
||||||
*pDst++ = acc0 + acc1;
|
*/
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
numSamples--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of cmplx_mag_squared group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,207 +1,196 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cmplx_mult_cmplx_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Floating-point complex-by-complex multiplication
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cmplx_mult_cmplx_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Floating-point complex-by-complex multiplication
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupCmplxMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @defgroup CmplxByCmplxMult Complex-by-Complex Multiplication
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* Multiplies a complex vector by another complex vector and generates a complex result.
|
||||||
* -------------------------------------------------------------------- */
|
* The data in the complex arrays is stored in an interleaved fashion
|
||||||
#include "arm_math.h"
|
* (real, imag, real, imag, ...).
|
||||||
|
* The parameter <code>numSamples</code> represents the number of complex
|
||||||
/**
|
* samples processed. The complex arrays have a total of <code>2*numSamples</code>
|
||||||
* @ingroup groupCmplxMath
|
* real values.
|
||||||
*/
|
*
|
||||||
|
* The underlying algorithm is used:
|
||||||
/**
|
*
|
||||||
* @defgroup CmplxByCmplxMult Complex-by-Complex Multiplication
|
* <pre>
|
||||||
*
|
* for(n=0; n<numSamples; n++) {
|
||||||
* Multiplies a complex vector by another complex vector and generates a complex result.
|
* pDst[(2*n)+0] = pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
|
||||||
* The data in the complex arrays is stored in an interleaved fashion
|
* pDst[(2*n)+1] = pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
|
||||||
* (real, imag, real, imag, ...).
|
* }
|
||||||
* The parameter <code>numSamples</code> represents the number of complex
|
* </pre>
|
||||||
* samples processed. The complex arrays have a total of <code>2*numSamples</code>
|
*
|
||||||
* real values.
|
* There are separate functions for floating-point, Q15, and Q31 data types.
|
||||||
*
|
*/
|
||||||
* The underlying algorithm is used:
|
|
||||||
*
|
/**
|
||||||
* <pre>
|
* @addtogroup CmplxByCmplxMult
|
||||||
* for(n=0; n<numSamples; n++) {
|
* @{
|
||||||
* pDst[(2*n)+0] = pSrcA[(2*n)+0] * pSrcB[(2*n)+0] - pSrcA[(2*n)+1] * pSrcB[(2*n)+1];
|
*/
|
||||||
* pDst[(2*n)+1] = pSrcA[(2*n)+0] * pSrcB[(2*n)+1] + pSrcA[(2*n)+1] * pSrcB[(2*n)+0];
|
|
||||||
* }
|
|
||||||
* </pre>
|
/**
|
||||||
*
|
* @brief Floating-point complex-by-complex multiplication
|
||||||
* There are separate functions for floating-point, Q15, and Q31 data types.
|
* @param[in] *pSrcA points to the first input vector
|
||||||
*/
|
* @param[in] *pSrcB points to the second input vector
|
||||||
|
* @param[out] *pDst points to the output vector
|
||||||
/**
|
* @param[in] numSamples number of complex samples in each vector
|
||||||
* @addtogroup CmplxByCmplxMult
|
* @return none.
|
||||||
* @{
|
*/
|
||||||
*/
|
|
||||||
|
void arm_cmplx_mult_cmplx_f32(
|
||||||
|
float32_t * pSrcA,
|
||||||
/**
|
float32_t * pSrcB,
|
||||||
* @brief Floating-point complex-by-complex multiplication
|
float32_t * pDst,
|
||||||
* @param[in] *pSrcA points to the first input vector
|
uint32_t numSamples)
|
||||||
* @param[in] *pSrcB points to the second input vector
|
{
|
||||||
* @param[out] *pDst points to the output vector
|
float32_t a1, b1, c1, d1; /* Temporary variables to store real and imaginary values */
|
||||||
* @param[in] numSamples number of complex samples in each vector
|
uint32_t blkCnt; /* loop counters */
|
||||||
* @return none.
|
|
||||||
*/
|
#if defined (ARM_MATH_DSP)
|
||||||
|
|
||||||
void arm_cmplx_mult_cmplx_f32(
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
float32_t * pSrcA,
|
float32_t a2, b2, c2, d2; /* Temporary variables to store real and imaginary values */
|
||||||
float32_t * pSrcB,
|
float32_t acc1, acc2, acc3, acc4;
|
||||||
float32_t * pDst,
|
|
||||||
uint32_t numSamples)
|
|
||||||
{
|
/* loop Unrolling */
|
||||||
float32_t a1, b1, c1, d1; /* Temporary variables to store real and imaginary values */
|
blkCnt = numSamples >> 2U;
|
||||||
uint32_t blkCnt; /* loop counters */
|
|
||||||
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
{
|
||||||
float32_t a2, b2, c2, d2; /* Temporary variables to store real and imaginary values */
|
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
||||||
float32_t acc1, acc2, acc3, acc4;
|
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
||||||
|
a1 = *pSrcA; /* A[2 * i] */
|
||||||
|
c1 = *pSrcB; /* B[2 * i] */
|
||||||
/* loop Unrolling */
|
|
||||||
blkCnt = numSamples >> 2u;
|
b1 = *(pSrcA + 1); /* A[2 * i + 1] */
|
||||||
|
acc1 = a1 * c1; /* acc1 = A[2 * i] * B[2 * i] */
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
a2 = *(pSrcA + 2); /* A[2 * i + 2] */
|
||||||
while(blkCnt > 0u)
|
acc2 = (b1 * c1); /* acc2 = A[2 * i + 1] * B[2 * i] */
|
||||||
{
|
|
||||||
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
d1 = *(pSrcB + 1); /* B[2 * i + 1] */
|
||||||
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
c2 = *(pSrcB + 2); /* B[2 * i + 2] */
|
||||||
a1 = *pSrcA; /* A[2 * i] */
|
acc1 -= b1 * d1; /* acc1 = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1] */
|
||||||
c1 = *pSrcB; /* B[2 * i] */
|
|
||||||
|
d2 = *(pSrcB + 3); /* B[2 * i + 3] */
|
||||||
b1 = *(pSrcA + 1); /* A[2 * i + 1] */
|
acc3 = a2 * c2; /* acc3 = A[2 * i + 2] * B[2 * i + 2] */
|
||||||
acc1 = a1 * c1; /* acc1 = A[2 * i] * B[2 * i] */
|
|
||||||
|
b2 = *(pSrcA + 3); /* A[2 * i + 3] */
|
||||||
a2 = *(pSrcA + 2); /* A[2 * i + 2] */
|
acc2 += (a1 * d1); /* acc2 = A[2 * i + 1] * B[2 * i] + A[2 * i] * B[2 * i + 1] */
|
||||||
acc2 = (b1 * c1); /* acc2 = A[2 * i + 1] * B[2 * i] */
|
|
||||||
|
a1 = *(pSrcA + 4); /* A[2 * i + 4] */
|
||||||
d1 = *(pSrcB + 1); /* B[2 * i + 1] */
|
acc4 = (a2 * d2); /* acc4 = A[2 * i + 2] * B[2 * i + 3] */
|
||||||
c2 = *(pSrcB + 2); /* B[2 * i + 2] */
|
|
||||||
acc1 -= b1 * d1; /* acc1 = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1] */
|
c1 = *(pSrcB + 4); /* B[2 * i + 4] */
|
||||||
|
acc3 -= (b2 * d2); /* acc3 = A[2 * i + 2] * B[2 * i + 2] - A[2 * i + 3] * B[2 * i + 3] */
|
||||||
d2 = *(pSrcB + 3); /* B[2 * i + 3] */
|
*pDst = acc1; /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1] */
|
||||||
acc3 = a2 * c2; /* acc3 = A[2 * i + 2] * B[2 * i + 2] */
|
|
||||||
|
b1 = *(pSrcA + 5); /* A[2 * i + 5] */
|
||||||
b2 = *(pSrcA + 3); /* A[2 * i + 3] */
|
acc4 += b2 * c2; /* acc4 = A[2 * i + 2] * B[2 * i + 3] + A[2 * i + 3] * B[2 * i + 2] */
|
||||||
acc2 += (a1 * d1); /* acc2 = A[2 * i + 1] * B[2 * i] + A[2 * i] * B[2 * i + 1] */
|
|
||||||
|
*(pDst + 1) = acc2; /* C[2 * i + 1] = A[2 * i + 1] * B[2 * i] + A[2 * i] * B[2 * i + 1] */
|
||||||
a1 = *(pSrcA + 4); /* A[2 * i + 4] */
|
acc1 = (a1 * c1);
|
||||||
acc4 = (a2 * d2); /* acc4 = A[2 * i + 2] * B[2 * i + 3] */
|
|
||||||
|
d1 = *(pSrcB + 5);
|
||||||
c1 = *(pSrcB + 4); /* B[2 * i + 4] */
|
acc2 = (b1 * c1);
|
||||||
acc3 -= (b2 * d2); /* acc3 = A[2 * i + 2] * B[2 * i + 2] - A[2 * i + 3] * B[2 * i + 3] */
|
|
||||||
*pDst = acc1; /* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1] */
|
*(pDst + 2) = acc3;
|
||||||
|
*(pDst + 3) = acc4;
|
||||||
b1 = *(pSrcA + 5); /* A[2 * i + 5] */
|
|
||||||
acc4 += b2 * c2; /* acc4 = A[2 * i + 2] * B[2 * i + 3] + A[2 * i + 3] * B[2 * i + 2] */
|
a2 = *(pSrcA + 6);
|
||||||
|
acc1 -= (b1 * d1);
|
||||||
*(pDst + 1) = acc2; /* C[2 * i + 1] = A[2 * i + 1] * B[2 * i] + A[2 * i] * B[2 * i + 1] */
|
|
||||||
acc1 = (a1 * c1);
|
c2 = *(pSrcB + 6);
|
||||||
|
acc2 += (a1 * d1);
|
||||||
d1 = *(pSrcB + 5);
|
|
||||||
acc2 = (b1 * c1);
|
b2 = *(pSrcA + 7);
|
||||||
|
acc3 = (a2 * c2);
|
||||||
*(pDst + 2) = acc3;
|
|
||||||
*(pDst + 3) = acc4;
|
d2 = *(pSrcB + 7);
|
||||||
|
acc4 = (b2 * c2);
|
||||||
a2 = *(pSrcA + 6);
|
|
||||||
acc1 -= (b1 * d1);
|
*(pDst + 4) = acc1;
|
||||||
|
pSrcA += 8U;
|
||||||
c2 = *(pSrcB + 6);
|
|
||||||
acc2 += (a1 * d1);
|
acc3 -= (b2 * d2);
|
||||||
|
acc4 += (a2 * d2);
|
||||||
b2 = *(pSrcA + 7);
|
|
||||||
acc3 = (a2 * c2);
|
*(pDst + 5) = acc2;
|
||||||
|
pSrcB += 8U;
|
||||||
d2 = *(pSrcB + 7);
|
|
||||||
acc4 = (b2 * c2);
|
*(pDst + 6) = acc3;
|
||||||
|
*(pDst + 7) = acc4;
|
||||||
*(pDst + 4) = acc1;
|
|
||||||
pSrcA += 8u;
|
pDst += 8U;
|
||||||
|
|
||||||
acc3 -= (b2 * d2);
|
/* Decrement the numSamples loop counter */
|
||||||
acc4 += (a2 * d2);
|
blkCnt--;
|
||||||
|
}
|
||||||
*(pDst + 5) = acc2;
|
|
||||||
pSrcB += 8u;
|
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||||
|
** No loop unrolling is used. */
|
||||||
*(pDst + 6) = acc3;
|
blkCnt = numSamples % 0x4U;
|
||||||
*(pDst + 7) = acc4;
|
|
||||||
|
#else
|
||||||
pDst += 8u;
|
|
||||||
|
/* Run the below code for Cortex-M0 */
|
||||||
/* Decrement the numSamples loop counter */
|
blkCnt = numSamples;
|
||||||
blkCnt--;
|
|
||||||
}
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
while (blkCnt > 0U)
|
||||||
** No loop unrolling is used. */
|
{
|
||||||
blkCnt = numSamples % 0x4u;
|
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
||||||
|
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
||||||
#else
|
a1 = *pSrcA++;
|
||||||
|
b1 = *pSrcA++;
|
||||||
/* Run the below code for Cortex-M0 */
|
c1 = *pSrcB++;
|
||||||
blkCnt = numSamples;
|
d1 = *pSrcB++;
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
/* store the result in the destination buffer. */
|
||||||
|
*pDst++ = (a1 * c1) - (b1 * d1);
|
||||||
while(blkCnt > 0u)
|
*pDst++ = (a1 * d1) + (b1 * c1);
|
||||||
{
|
|
||||||
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
/* Decrement the numSamples loop counter */
|
||||||
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
blkCnt--;
|
||||||
a1 = *pSrcA++;
|
}
|
||||||
b1 = *pSrcA++;
|
}
|
||||||
c1 = *pSrcB++;
|
|
||||||
d1 = *pSrcB++;
|
/**
|
||||||
|
* @} end of CmplxByCmplxMult group
|
||||||
/* store the result in the destination buffer. */
|
*/
|
||||||
*pDst++ = (a1 * c1) - (b1 * d1);
|
|
||||||
*pDst++ = (a1 * d1) + (b1 * c1);
|
|
||||||
|
|
||||||
/* Decrement the numSamples loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of CmplxByCmplxMult group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,193 +1,181 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cmplx_mult_cmplx_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q15 complex-by-complex multiplication
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cmplx_mult_cmplx_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q15 complex-by-complex multiplication
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupCmplxMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup CmplxByCmplxMult
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Q15 complex-by-complex multiplication
|
||||||
|
* @param[in] *pSrcA points to the first input vector
|
||||||
/**
|
* @param[in] *pSrcB points to the second input vector
|
||||||
* @ingroup groupCmplxMath
|
* @param[out] *pDst points to the output vector
|
||||||
*/
|
* @param[in] numSamples number of complex samples in each vector
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup CmplxByCmplxMult
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format.
|
||||||
|
*/
|
||||||
/**
|
|
||||||
* @brief Q15 complex-by-complex multiplication
|
void arm_cmplx_mult_cmplx_q15(
|
||||||
* @param[in] *pSrcA points to the first input vector
|
q15_t * pSrcA,
|
||||||
* @param[in] *pSrcB points to the second input vector
|
q15_t * pSrcB,
|
||||||
* @param[out] *pDst points to the output vector
|
q15_t * pDst,
|
||||||
* @param[in] numSamples number of complex samples in each vector
|
uint32_t numSamples)
|
||||||
* @return none.
|
{
|
||||||
*
|
q15_t a, b, c, d; /* Temporary variables to store real and imaginary values */
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
|
||||||
* \par
|
#if defined (ARM_MATH_DSP)
|
||||||
* The function implements 1.15 by 1.15 multiplications and finally output is converted into 3.13 format.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
uint32_t blkCnt; /* loop counters */
|
||||||
void arm_cmplx_mult_cmplx_q15(
|
|
||||||
q15_t * pSrcA,
|
/* loop Unrolling */
|
||||||
q15_t * pSrcB,
|
blkCnt = numSamples >> 2U;
|
||||||
q15_t * pDst,
|
|
||||||
uint32_t numSamples)
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
{
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
q15_t a, b, c, d; /* Temporary variables to store real and imaginary values */
|
while (blkCnt > 0U)
|
||||||
|
{
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
||||||
|
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
a = *pSrcA++;
|
||||||
uint32_t blkCnt; /* loop counters */
|
b = *pSrcA++;
|
||||||
|
c = *pSrcB++;
|
||||||
/* loop Unrolling */
|
d = *pSrcB++;
|
||||||
blkCnt = numSamples >> 2u;
|
|
||||||
|
/* store the result in 3.13 format in the destination buffer. */
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
*pDst++ =
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
(q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
|
||||||
while(blkCnt > 0u)
|
/* store the result in 3.13 format in the destination buffer. */
|
||||||
{
|
*pDst++ =
|
||||||
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
(q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
|
||||||
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
|
||||||
a = *pSrcA++;
|
a = *pSrcA++;
|
||||||
b = *pSrcA++;
|
b = *pSrcA++;
|
||||||
c = *pSrcB++;
|
c = *pSrcB++;
|
||||||
d = *pSrcB++;
|
d = *pSrcB++;
|
||||||
|
|
||||||
/* store the result in 3.13 format in the destination buffer. */
|
/* store the result in 3.13 format in the destination buffer. */
|
||||||
*pDst++ =
|
*pDst++ =
|
||||||
(q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
|
(q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
|
||||||
/* store the result in 3.13 format in the destination buffer. */
|
/* store the result in 3.13 format in the destination buffer. */
|
||||||
*pDst++ =
|
*pDst++ =
|
||||||
(q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
|
(q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
|
||||||
|
|
||||||
a = *pSrcA++;
|
a = *pSrcA++;
|
||||||
b = *pSrcA++;
|
b = *pSrcA++;
|
||||||
c = *pSrcB++;
|
c = *pSrcB++;
|
||||||
d = *pSrcB++;
|
d = *pSrcB++;
|
||||||
|
|
||||||
/* store the result in 3.13 format in the destination buffer. */
|
/* store the result in 3.13 format in the destination buffer. */
|
||||||
*pDst++ =
|
*pDst++ =
|
||||||
(q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
|
(q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
|
||||||
/* store the result in 3.13 format in the destination buffer. */
|
/* store the result in 3.13 format in the destination buffer. */
|
||||||
*pDst++ =
|
*pDst++ =
|
||||||
(q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
|
(q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
|
||||||
|
|
||||||
a = *pSrcA++;
|
a = *pSrcA++;
|
||||||
b = *pSrcA++;
|
b = *pSrcA++;
|
||||||
c = *pSrcB++;
|
c = *pSrcB++;
|
||||||
d = *pSrcB++;
|
d = *pSrcB++;
|
||||||
|
|
||||||
/* store the result in 3.13 format in the destination buffer. */
|
/* store the result in 3.13 format in the destination buffer. */
|
||||||
*pDst++ =
|
*pDst++ =
|
||||||
(q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
|
(q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
|
||||||
/* store the result in 3.13 format in the destination buffer. */
|
/* store the result in 3.13 format in the destination buffer. */
|
||||||
*pDst++ =
|
*pDst++ =
|
||||||
(q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
|
(q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
|
||||||
|
|
||||||
a = *pSrcA++;
|
/* Decrement the blockSize loop counter */
|
||||||
b = *pSrcA++;
|
blkCnt--;
|
||||||
c = *pSrcB++;
|
}
|
||||||
d = *pSrcB++;
|
|
||||||
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
/* store the result in 3.13 format in the destination buffer. */
|
** No loop unrolling is used. */
|
||||||
*pDst++ =
|
blkCnt = numSamples % 0x4U;
|
||||||
(q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
|
|
||||||
/* store the result in 3.13 format in the destination buffer. */
|
while (blkCnt > 0U)
|
||||||
*pDst++ =
|
{
|
||||||
(q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
|
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
||||||
|
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
||||||
/* Decrement the blockSize loop counter */
|
a = *pSrcA++;
|
||||||
blkCnt--;
|
b = *pSrcA++;
|
||||||
}
|
c = *pSrcB++;
|
||||||
|
d = *pSrcB++;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
/* store the result in 3.13 format in the destination buffer. */
|
||||||
blkCnt = numSamples % 0x4u;
|
*pDst++ =
|
||||||
|
(q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
|
||||||
while(blkCnt > 0u)
|
/* store the result in 3.13 format in the destination buffer. */
|
||||||
{
|
*pDst++ =
|
||||||
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
(q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
|
||||||
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
|
||||||
a = *pSrcA++;
|
/* Decrement the blockSize loop counter */
|
||||||
b = *pSrcA++;
|
blkCnt--;
|
||||||
c = *pSrcB++;
|
}
|
||||||
d = *pSrcB++;
|
|
||||||
|
#else
|
||||||
/* store the result in 3.13 format in the destination buffer. */
|
|
||||||
*pDst++ =
|
/* Run the below code for Cortex-M0 */
|
||||||
(q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
|
|
||||||
/* store the result in 3.13 format in the destination buffer. */
|
while (numSamples > 0U)
|
||||||
*pDst++ =
|
{
|
||||||
(q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
|
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
||||||
|
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
||||||
/* Decrement the blockSize loop counter */
|
a = *pSrcA++;
|
||||||
blkCnt--;
|
b = *pSrcA++;
|
||||||
}
|
c = *pSrcB++;
|
||||||
|
d = *pSrcB++;
|
||||||
#else
|
|
||||||
|
/* store the result in 3.13 format in the destination buffer. */
|
||||||
/* Run the below code for Cortex-M0 */
|
*pDst++ =
|
||||||
|
(q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
|
||||||
while(numSamples > 0u)
|
/* store the result in 3.13 format in the destination buffer. */
|
||||||
{
|
*pDst++ =
|
||||||
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
(q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
|
||||||
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
|
||||||
a = *pSrcA++;
|
/* Decrement the blockSize loop counter */
|
||||||
b = *pSrcA++;
|
numSamples--;
|
||||||
c = *pSrcB++;
|
}
|
||||||
d = *pSrcB++;
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* store the result in 3.13 format in the destination buffer. */
|
|
||||||
*pDst++ =
|
}
|
||||||
(q15_t) (q31_t) (((q31_t) a * c) >> 17) - (((q31_t) b * d) >> 17);
|
|
||||||
/* store the result in 3.13 format in the destination buffer. */
|
/**
|
||||||
*pDst++ =
|
* @} end of CmplxByCmplxMult group
|
||||||
(q15_t) (q31_t) (((q31_t) a * d) >> 17) + (((q31_t) b * c) >> 17);
|
*/
|
||||||
|
|
||||||
/* Decrement the blockSize loop counter */
|
|
||||||
numSamples--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of CmplxByCmplxMult group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,326 +1,314 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cmplx_mult_cmplx_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q31 complex-by-complex multiplication
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cmplx_mult_cmplx_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q31 complex-by-complex multiplication
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupCmplxMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup CmplxByCmplxMult
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_math.h"
|
/**
|
||||||
|
* @brief Q31 complex-by-complex multiplication
|
||||||
/**
|
* @param[in] *pSrcA points to the first input vector
|
||||||
* @ingroup groupCmplxMath
|
* @param[in] *pSrcB points to the second input vector
|
||||||
*/
|
* @param[out] *pDst points to the output vector
|
||||||
|
* @param[in] numSamples number of complex samples in each vector
|
||||||
/**
|
* @return none.
|
||||||
* @addtogroup CmplxByCmplxMult
|
*
|
||||||
* @{
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
*/
|
* \par
|
||||||
|
* The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
|
||||||
|
* Input down scaling is not required.
|
||||||
/**
|
*/
|
||||||
* @brief Q31 complex-by-complex multiplication
|
|
||||||
* @param[in] *pSrcA points to the first input vector
|
void arm_cmplx_mult_cmplx_q31(
|
||||||
* @param[in] *pSrcB points to the second input vector
|
q31_t * pSrcA,
|
||||||
* @param[out] *pDst points to the output vector
|
q31_t * pSrcB,
|
||||||
* @param[in] numSamples number of complex samples in each vector
|
q31_t * pDst,
|
||||||
* @return none.
|
uint32_t numSamples)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
q31_t a, b, c, d; /* Temporary variables to store real and imaginary values */
|
||||||
* \par
|
uint32_t blkCnt; /* loop counters */
|
||||||
* The function implements 1.31 by 1.31 multiplications and finally output is converted into 3.29 format.
|
q31_t mul1, mul2, mul3, mul4;
|
||||||
* Input down scaling is not required.
|
q31_t out1, out2;
|
||||||
*/
|
|
||||||
|
#if defined (ARM_MATH_DSP)
|
||||||
void arm_cmplx_mult_cmplx_q31(
|
|
||||||
q31_t * pSrcA,
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
q31_t * pSrcB,
|
|
||||||
q31_t * pDst,
|
/* loop Unrolling */
|
||||||
uint32_t numSamples)
|
blkCnt = numSamples >> 2U;
|
||||||
{
|
|
||||||
q31_t a, b, c, d; /* Temporary variables to store real and imaginary values */
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
uint32_t blkCnt; /* loop counters */
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
q31_t mul1, mul2, mul3, mul4;
|
while (blkCnt > 0U)
|
||||||
q31_t out1, out2;
|
{
|
||||||
|
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
||||||
|
a = *pSrcA++;
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
b = *pSrcA++;
|
||||||
|
c = *pSrcB++;
|
||||||
/* loop Unrolling */
|
d = *pSrcB++;
|
||||||
blkCnt = numSamples >> 2u;
|
|
||||||
|
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
||||||
while(blkCnt > 0u)
|
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
||||||
{
|
|
||||||
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
mul1 = (mul1 >> 1);
|
||||||
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
mul2 = (mul2 >> 1);
|
||||||
a = *pSrcA++;
|
mul3 = (mul3 >> 1);
|
||||||
b = *pSrcA++;
|
mul4 = (mul4 >> 1);
|
||||||
c = *pSrcB++;
|
|
||||||
d = *pSrcB++;
|
out1 = mul1 - mul2;
|
||||||
|
out2 = mul3 + mul4;
|
||||||
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
|
||||||
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
/* store the real result in 3.29 format in the destination buffer. */
|
||||||
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
*pDst++ = out1;
|
||||||
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
/* store the imag result in 3.29 format in the destination buffer. */
|
||||||
|
*pDst++ = out2;
|
||||||
mul1 = (mul1 >> 1);
|
|
||||||
mul2 = (mul2 >> 1);
|
a = *pSrcA++;
|
||||||
mul3 = (mul3 >> 1);
|
b = *pSrcA++;
|
||||||
mul4 = (mul4 >> 1);
|
c = *pSrcB++;
|
||||||
|
d = *pSrcB++;
|
||||||
out1 = mul1 - mul2;
|
|
||||||
out2 = mul3 + mul4;
|
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
||||||
|
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
||||||
/* store the real result in 3.29 format in the destination buffer. */
|
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
||||||
*pDst++ = out1;
|
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
||||||
/* store the imag result in 3.29 format in the destination buffer. */
|
|
||||||
*pDst++ = out2;
|
mul1 = (mul1 >> 1);
|
||||||
|
mul2 = (mul2 >> 1);
|
||||||
a = *pSrcA++;
|
mul3 = (mul3 >> 1);
|
||||||
b = *pSrcA++;
|
mul4 = (mul4 >> 1);
|
||||||
c = *pSrcB++;
|
|
||||||
d = *pSrcB++;
|
out1 = mul1 - mul2;
|
||||||
|
out2 = mul3 + mul4;
|
||||||
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
|
||||||
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
/* store the real result in 3.29 format in the destination buffer. */
|
||||||
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
*pDst++ = out1;
|
||||||
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
/* store the imag result in 3.29 format in the destination buffer. */
|
||||||
|
*pDst++ = out2;
|
||||||
mul1 = (mul1 >> 1);
|
|
||||||
mul2 = (mul2 >> 1);
|
a = *pSrcA++;
|
||||||
mul3 = (mul3 >> 1);
|
b = *pSrcA++;
|
||||||
mul4 = (mul4 >> 1);
|
c = *pSrcB++;
|
||||||
|
d = *pSrcB++;
|
||||||
out1 = mul1 - mul2;
|
|
||||||
out2 = mul3 + mul4;
|
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
||||||
|
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
||||||
/* store the real result in 3.29 format in the destination buffer. */
|
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
||||||
*pDst++ = out1;
|
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
||||||
/* store the imag result in 3.29 format in the destination buffer. */
|
|
||||||
*pDst++ = out2;
|
mul1 = (mul1 >> 1);
|
||||||
|
mul2 = (mul2 >> 1);
|
||||||
a = *pSrcA++;
|
mul3 = (mul3 >> 1);
|
||||||
b = *pSrcA++;
|
mul4 = (mul4 >> 1);
|
||||||
c = *pSrcB++;
|
|
||||||
d = *pSrcB++;
|
out1 = mul1 - mul2;
|
||||||
|
out2 = mul3 + mul4;
|
||||||
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
|
||||||
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
/* store the real result in 3.29 format in the destination buffer. */
|
||||||
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
*pDst++ = out1;
|
||||||
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
/* store the imag result in 3.29 format in the destination buffer. */
|
||||||
|
*pDst++ = out2;
|
||||||
mul1 = (mul1 >> 1);
|
|
||||||
mul2 = (mul2 >> 1);
|
a = *pSrcA++;
|
||||||
mul3 = (mul3 >> 1);
|
b = *pSrcA++;
|
||||||
mul4 = (mul4 >> 1);
|
c = *pSrcB++;
|
||||||
|
d = *pSrcB++;
|
||||||
out1 = mul1 - mul2;
|
|
||||||
out2 = mul3 + mul4;
|
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
||||||
|
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
||||||
/* store the real result in 3.29 format in the destination buffer. */
|
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
||||||
*pDst++ = out1;
|
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
||||||
/* store the imag result in 3.29 format in the destination buffer. */
|
|
||||||
*pDst++ = out2;
|
mul1 = (mul1 >> 1);
|
||||||
|
mul2 = (mul2 >> 1);
|
||||||
a = *pSrcA++;
|
mul3 = (mul3 >> 1);
|
||||||
b = *pSrcA++;
|
mul4 = (mul4 >> 1);
|
||||||
c = *pSrcB++;
|
|
||||||
d = *pSrcB++;
|
out1 = mul1 - mul2;
|
||||||
|
out2 = mul3 + mul4;
|
||||||
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
|
||||||
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
/* store the real result in 3.29 format in the destination buffer. */
|
||||||
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
*pDst++ = out1;
|
||||||
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
/* store the imag result in 3.29 format in the destination buffer. */
|
||||||
|
*pDst++ = out2;
|
||||||
mul1 = (mul1 >> 1);
|
|
||||||
mul2 = (mul2 >> 1);
|
/* Decrement the blockSize loop counter */
|
||||||
mul3 = (mul3 >> 1);
|
blkCnt--;
|
||||||
mul4 = (mul4 >> 1);
|
}
|
||||||
|
|
||||||
out1 = mul1 - mul2;
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
out2 = mul3 + mul4;
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = numSamples % 0x4U;
|
||||||
/* store the real result in 3.29 format in the destination buffer. */
|
|
||||||
*pDst++ = out1;
|
while (blkCnt > 0U)
|
||||||
/* store the imag result in 3.29 format in the destination buffer. */
|
{
|
||||||
*pDst++ = out2;
|
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
||||||
|
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
||||||
/* Decrement the blockSize loop counter */
|
a = *pSrcA++;
|
||||||
blkCnt--;
|
b = *pSrcA++;
|
||||||
}
|
c = *pSrcB++;
|
||||||
|
d = *pSrcB++;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
||||||
blkCnt = numSamples % 0x4u;
|
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
||||||
|
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
||||||
while(blkCnt > 0u)
|
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
||||||
{
|
|
||||||
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
mul1 = (mul1 >> 1);
|
||||||
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
mul2 = (mul2 >> 1);
|
||||||
a = *pSrcA++;
|
mul3 = (mul3 >> 1);
|
||||||
b = *pSrcA++;
|
mul4 = (mul4 >> 1);
|
||||||
c = *pSrcB++;
|
|
||||||
d = *pSrcB++;
|
out1 = mul1 - mul2;
|
||||||
|
out2 = mul3 + mul4;
|
||||||
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
|
||||||
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
/* store the real result in 3.29 format in the destination buffer. */
|
||||||
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
*pDst++ = out1;
|
||||||
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
/* store the imag result in 3.29 format in the destination buffer. */
|
||||||
|
*pDst++ = out2;
|
||||||
mul1 = (mul1 >> 1);
|
|
||||||
mul2 = (mul2 >> 1);
|
/* Decrement the blockSize loop counter */
|
||||||
mul3 = (mul3 >> 1);
|
blkCnt--;
|
||||||
mul4 = (mul4 >> 1);
|
}
|
||||||
|
|
||||||
out1 = mul1 - mul2;
|
#else
|
||||||
out2 = mul3 + mul4;
|
|
||||||
|
/* Run the below code for Cortex-M0 */
|
||||||
/* store the real result in 3.29 format in the destination buffer. */
|
|
||||||
*pDst++ = out1;
|
/* loop Unrolling */
|
||||||
/* store the imag result in 3.29 format in the destination buffer. */
|
blkCnt = numSamples >> 1U;
|
||||||
*pDst++ = out2;
|
|
||||||
|
/* First part of the processing with loop unrolling. Compute 2 outputs at a time.
|
||||||
/* Decrement the blockSize loop counter */
|
** a second loop below computes the remaining 1 sample. */
|
||||||
blkCnt--;
|
while (blkCnt > 0U)
|
||||||
}
|
{
|
||||||
|
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
||||||
#else
|
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
||||||
|
a = *pSrcA++;
|
||||||
/* Run the below code for Cortex-M0 */
|
b = *pSrcA++;
|
||||||
|
c = *pSrcB++;
|
||||||
/* loop Unrolling */
|
d = *pSrcB++;
|
||||||
blkCnt = numSamples >> 1u;
|
|
||||||
|
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
||||||
/* First part of the processing with loop unrolling. Compute 2 outputs at a time.
|
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
||||||
** a second loop below computes the remaining 1 sample. */
|
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
||||||
while(blkCnt > 0u)
|
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
||||||
{
|
|
||||||
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
mul1 = (mul1 >> 1);
|
||||||
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
mul2 = (mul2 >> 1);
|
||||||
a = *pSrcA++;
|
mul3 = (mul3 >> 1);
|
||||||
b = *pSrcA++;
|
mul4 = (mul4 >> 1);
|
||||||
c = *pSrcB++;
|
|
||||||
d = *pSrcB++;
|
out1 = mul1 - mul2;
|
||||||
|
out2 = mul3 + mul4;
|
||||||
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
|
||||||
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
/* store the real result in 3.29 format in the destination buffer. */
|
||||||
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
*pDst++ = out1;
|
||||||
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
/* store the imag result in 3.29 format in the destination buffer. */
|
||||||
|
*pDst++ = out2;
|
||||||
mul1 = (mul1 >> 1);
|
|
||||||
mul2 = (mul2 >> 1);
|
a = *pSrcA++;
|
||||||
mul3 = (mul3 >> 1);
|
b = *pSrcA++;
|
||||||
mul4 = (mul4 >> 1);
|
c = *pSrcB++;
|
||||||
|
d = *pSrcB++;
|
||||||
out1 = mul1 - mul2;
|
|
||||||
out2 = mul3 + mul4;
|
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
||||||
|
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
||||||
/* store the real result in 3.29 format in the destination buffer. */
|
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
||||||
*pDst++ = out1;
|
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
||||||
/* store the imag result in 3.29 format in the destination buffer. */
|
|
||||||
*pDst++ = out2;
|
mul1 = (mul1 >> 1);
|
||||||
|
mul2 = (mul2 >> 1);
|
||||||
a = *pSrcA++;
|
mul3 = (mul3 >> 1);
|
||||||
b = *pSrcA++;
|
mul4 = (mul4 >> 1);
|
||||||
c = *pSrcB++;
|
|
||||||
d = *pSrcB++;
|
out1 = mul1 - mul2;
|
||||||
|
out2 = mul3 + mul4;
|
||||||
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
|
||||||
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
/* store the real result in 3.29 format in the destination buffer. */
|
||||||
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
*pDst++ = out1;
|
||||||
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
/* store the imag result in 3.29 format in the destination buffer. */
|
||||||
|
*pDst++ = out2;
|
||||||
mul1 = (mul1 >> 1);
|
|
||||||
mul2 = (mul2 >> 1);
|
/* Decrement the blockSize loop counter */
|
||||||
mul3 = (mul3 >> 1);
|
blkCnt--;
|
||||||
mul4 = (mul4 >> 1);
|
}
|
||||||
|
|
||||||
out1 = mul1 - mul2;
|
/* If the blockSize is not a multiple of 2, compute any remaining output samples here.
|
||||||
out2 = mul3 + mul4;
|
** No loop unrolling is used. */
|
||||||
|
blkCnt = numSamples % 0x2U;
|
||||||
/* store the real result in 3.29 format in the destination buffer. */
|
|
||||||
*pDst++ = out1;
|
while (blkCnt > 0U)
|
||||||
/* store the imag result in 3.29 format in the destination buffer. */
|
{
|
||||||
*pDst++ = out2;
|
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
||||||
|
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
||||||
/* Decrement the blockSize loop counter */
|
a = *pSrcA++;
|
||||||
blkCnt--;
|
b = *pSrcA++;
|
||||||
}
|
c = *pSrcB++;
|
||||||
|
d = *pSrcB++;
|
||||||
/* If the blockSize is not a multiple of 2, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
||||||
blkCnt = numSamples % 0x2u;
|
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
||||||
|
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
||||||
while(blkCnt > 0u)
|
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
||||||
{
|
|
||||||
/* C[2 * i] = A[2 * i] * B[2 * i] - A[2 * i + 1] * B[2 * i + 1]. */
|
mul1 = (mul1 >> 1);
|
||||||
/* C[2 * i + 1] = A[2 * i] * B[2 * i + 1] + A[2 * i + 1] * B[2 * i]. */
|
mul2 = (mul2 >> 1);
|
||||||
a = *pSrcA++;
|
mul3 = (mul3 >> 1);
|
||||||
b = *pSrcA++;
|
mul4 = (mul4 >> 1);
|
||||||
c = *pSrcB++;
|
|
||||||
d = *pSrcB++;
|
out1 = mul1 - mul2;
|
||||||
|
out2 = mul3 + mul4;
|
||||||
mul1 = (q31_t) (((q63_t) a * c) >> 32);
|
|
||||||
mul2 = (q31_t) (((q63_t) b * d) >> 32);
|
/* store the real result in 3.29 format in the destination buffer. */
|
||||||
mul3 = (q31_t) (((q63_t) a * d) >> 32);
|
*pDst++ = out1;
|
||||||
mul4 = (q31_t) (((q63_t) b * c) >> 32);
|
/* store the imag result in 3.29 format in the destination buffer. */
|
||||||
|
*pDst++ = out2;
|
||||||
mul1 = (mul1 >> 1);
|
|
||||||
mul2 = (mul2 >> 1);
|
/* Decrement the blockSize loop counter */
|
||||||
mul3 = (mul3 >> 1);
|
blkCnt--;
|
||||||
mul4 = (mul4 >> 1);
|
}
|
||||||
|
|
||||||
out1 = mul1 - mul2;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
out2 = mul3 + mul4;
|
|
||||||
|
}
|
||||||
/* store the real result in 3.29 format in the destination buffer. */
|
|
||||||
*pDst++ = out1;
|
/**
|
||||||
/* store the imag result in 3.29 format in the destination buffer. */
|
* @} end of CmplxByCmplxMult group
|
||||||
*pDst++ = out2;
|
*/
|
||||||
|
|
||||||
/* Decrement the blockSize loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of CmplxByCmplxMult group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,225 +1,213 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cmplx_mult_real_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Floating-point complex by real multiplication
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cmplx_mult_real_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Floating-point complex by real multiplication
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupCmplxMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @defgroup CmplxByRealMult Complex-by-Real Multiplication
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* Multiplies a complex vector by a real vector and generates a complex result.
|
||||||
* -------------------------------------------------------------------- */
|
* The data in the complex arrays is stored in an interleaved fashion
|
||||||
|
* (real, imag, real, imag, ...).
|
||||||
#include "arm_math.h"
|
* The parameter <code>numSamples</code> represents the number of complex
|
||||||
|
* samples processed. The complex arrays have a total of <code>2*numSamples</code>
|
||||||
/**
|
* real values while the real array has a total of <code>numSamples</code>
|
||||||
* @ingroup groupCmplxMath
|
* real values.
|
||||||
*/
|
*
|
||||||
|
* The underlying algorithm is used:
|
||||||
/**
|
*
|
||||||
* @defgroup CmplxByRealMult Complex-by-Real Multiplication
|
* <pre>
|
||||||
*
|
* for(n=0; n<numSamples; n++) {
|
||||||
* Multiplies a complex vector by a real vector and generates a complex result.
|
* pCmplxDst[(2*n)+0] = pSrcCmplx[(2*n)+0] * pSrcReal[n];
|
||||||
* The data in the complex arrays is stored in an interleaved fashion
|
* pCmplxDst[(2*n)+1] = pSrcCmplx[(2*n)+1] * pSrcReal[n];
|
||||||
* (real, imag, real, imag, ...).
|
* }
|
||||||
* The parameter <code>numSamples</code> represents the number of complex
|
* </pre>
|
||||||
* samples processed. The complex arrays have a total of <code>2*numSamples</code>
|
*
|
||||||
* real values while the real array has a total of <code>numSamples</code>
|
* There are separate functions for floating-point, Q15, and Q31 data types.
|
||||||
* real values.
|
*/
|
||||||
*
|
|
||||||
* The underlying algorithm is used:
|
/**
|
||||||
*
|
* @addtogroup CmplxByRealMult
|
||||||
* <pre>
|
* @{
|
||||||
* for(n=0; n<numSamples; n++) {
|
*/
|
||||||
* pCmplxDst[(2*n)+0] = pSrcCmplx[(2*n)+0] * pSrcReal[n];
|
|
||||||
* pCmplxDst[(2*n)+1] = pSrcCmplx[(2*n)+1] * pSrcReal[n];
|
|
||||||
* }
|
/**
|
||||||
* </pre>
|
* @brief Floating-point complex-by-real multiplication
|
||||||
*
|
* @param[in] *pSrcCmplx points to the complex input vector
|
||||||
* There are separate functions for floating-point, Q15, and Q31 data types.
|
* @param[in] *pSrcReal points to the real input vector
|
||||||
*/
|
* @param[out] *pCmplxDst points to the complex output vector
|
||||||
|
* @param[in] numSamples number of samples in each vector
|
||||||
/**
|
* @return none.
|
||||||
* @addtogroup CmplxByRealMult
|
*/
|
||||||
* @{
|
|
||||||
*/
|
void arm_cmplx_mult_real_f32(
|
||||||
|
float32_t * pSrcCmplx,
|
||||||
|
float32_t * pSrcReal,
|
||||||
/**
|
float32_t * pCmplxDst,
|
||||||
* @brief Floating-point complex-by-real multiplication
|
uint32_t numSamples)
|
||||||
* @param[in] *pSrcCmplx points to the complex input vector
|
{
|
||||||
* @param[in] *pSrcReal points to the real input vector
|
float32_t in; /* Temporary variable to store input value */
|
||||||
* @param[out] *pCmplxDst points to the complex output vector
|
uint32_t blkCnt; /* loop counters */
|
||||||
* @param[in] numSamples number of samples in each vector
|
|
||||||
* @return none.
|
#if defined (ARM_MATH_DSP)
|
||||||
*/
|
|
||||||
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
void arm_cmplx_mult_real_f32(
|
float32_t inA1, inA2, inA3, inA4; /* Temporary variables to hold input data */
|
||||||
float32_t * pSrcCmplx,
|
float32_t inA5, inA6, inA7, inA8; /* Temporary variables to hold input data */
|
||||||
float32_t * pSrcReal,
|
float32_t inB1, inB2, inB3, inB4; /* Temporary variables to hold input data */
|
||||||
float32_t * pCmplxDst,
|
float32_t out1, out2, out3, out4; /* Temporary variables to hold output data */
|
||||||
uint32_t numSamples)
|
float32_t out5, out6, out7, out8; /* Temporary variables to hold output data */
|
||||||
{
|
|
||||||
float32_t in; /* Temporary variable to store input value */
|
/* loop Unrolling */
|
||||||
uint32_t blkCnt; /* loop counters */
|
blkCnt = numSamples >> 2U;
|
||||||
|
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
while (blkCnt > 0U)
|
||||||
float32_t inA1, inA2, inA3, inA4; /* Temporary variables to hold input data */
|
{
|
||||||
float32_t inA5, inA6, inA7, inA8; /* Temporary variables to hold input data */
|
/* C[2 * i] = A[2 * i] * B[i]. */
|
||||||
float32_t inB1, inB2, inB3, inB4; /* Temporary variables to hold input data */
|
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
|
||||||
float32_t out1, out2, out3, out4; /* Temporary variables to hold output data */
|
/* read input from complex input buffer */
|
||||||
float32_t out5, out6, out7, out8; /* Temporary variables to hold output data */
|
inA1 = pSrcCmplx[0];
|
||||||
|
inA2 = pSrcCmplx[1];
|
||||||
/* loop Unrolling */
|
/* read input from real input buffer */
|
||||||
blkCnt = numSamples >> 2u;
|
inB1 = pSrcReal[0];
|
||||||
|
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
/* read input from complex input buffer */
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
inA3 = pSrcCmplx[2];
|
||||||
while(blkCnt > 0u)
|
|
||||||
{
|
/* multiply complex buffer real input with real buffer input */
|
||||||
/* C[2 * i] = A[2 * i] * B[i]. */
|
out1 = inA1 * inB1;
|
||||||
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
|
|
||||||
/* read input from complex input buffer */
|
/* read input from complex input buffer */
|
||||||
inA1 = pSrcCmplx[0];
|
inA4 = pSrcCmplx[3];
|
||||||
inA2 = pSrcCmplx[1];
|
|
||||||
/* read input from real input buffer */
|
/* multiply complex buffer imaginary input with real buffer input */
|
||||||
inB1 = pSrcReal[0];
|
out2 = inA2 * inB1;
|
||||||
|
|
||||||
/* read input from complex input buffer */
|
/* read input from real input buffer */
|
||||||
inA3 = pSrcCmplx[2];
|
inB2 = pSrcReal[1];
|
||||||
|
/* read input from complex input buffer */
|
||||||
/* multiply complex buffer real input with real buffer input */
|
inA5 = pSrcCmplx[4];
|
||||||
out1 = inA1 * inB1;
|
|
||||||
|
/* multiply complex buffer real input with real buffer input */
|
||||||
/* read input from complex input buffer */
|
out3 = inA3 * inB2;
|
||||||
inA4 = pSrcCmplx[3];
|
|
||||||
|
/* read input from complex input buffer */
|
||||||
/* multiply complex buffer imaginary input with real buffer input */
|
inA6 = pSrcCmplx[5];
|
||||||
out2 = inA2 * inB1;
|
/* read input from real input buffer */
|
||||||
|
inB3 = pSrcReal[2];
|
||||||
/* read input from real input buffer */
|
|
||||||
inB2 = pSrcReal[1];
|
/* multiply complex buffer imaginary input with real buffer input */
|
||||||
/* read input from complex input buffer */
|
out4 = inA4 * inB2;
|
||||||
inA5 = pSrcCmplx[4];
|
|
||||||
|
/* read input from complex input buffer */
|
||||||
/* multiply complex buffer real input with real buffer input */
|
inA7 = pSrcCmplx[6];
|
||||||
out3 = inA3 * inB2;
|
|
||||||
|
/* multiply complex buffer real input with real buffer input */
|
||||||
/* read input from complex input buffer */
|
out5 = inA5 * inB3;
|
||||||
inA6 = pSrcCmplx[5];
|
|
||||||
/* read input from real input buffer */
|
/* read input from complex input buffer */
|
||||||
inB3 = pSrcReal[2];
|
inA8 = pSrcCmplx[7];
|
||||||
|
|
||||||
/* multiply complex buffer imaginary input with real buffer input */
|
/* multiply complex buffer imaginary input with real buffer input */
|
||||||
out4 = inA4 * inB2;
|
out6 = inA6 * inB3;
|
||||||
|
|
||||||
/* read input from complex input buffer */
|
/* read input from real input buffer */
|
||||||
inA7 = pSrcCmplx[6];
|
inB4 = pSrcReal[3];
|
||||||
|
|
||||||
/* multiply complex buffer real input with real buffer input */
|
/* store result to destination bufer */
|
||||||
out5 = inA5 * inB3;
|
pCmplxDst[0] = out1;
|
||||||
|
|
||||||
/* read input from complex input buffer */
|
/* multiply complex buffer real input with real buffer input */
|
||||||
inA8 = pSrcCmplx[7];
|
out7 = inA7 * inB4;
|
||||||
|
|
||||||
/* multiply complex buffer imaginary input with real buffer input */
|
/* store result to destination bufer */
|
||||||
out6 = inA6 * inB3;
|
pCmplxDst[1] = out2;
|
||||||
|
|
||||||
/* read input from real input buffer */
|
/* multiply complex buffer imaginary input with real buffer input */
|
||||||
inB4 = pSrcReal[3];
|
out8 = inA8 * inB4;
|
||||||
|
|
||||||
/* store result to destination bufer */
|
/* store result to destination bufer */
|
||||||
pCmplxDst[0] = out1;
|
pCmplxDst[2] = out3;
|
||||||
|
pCmplxDst[3] = out4;
|
||||||
/* multiply complex buffer real input with real buffer input */
|
pCmplxDst[4] = out5;
|
||||||
out7 = inA7 * inB4;
|
|
||||||
|
/* incremnet complex input buffer by 8 to process next samples */
|
||||||
/* store result to destination bufer */
|
pSrcCmplx += 8U;
|
||||||
pCmplxDst[1] = out2;
|
|
||||||
|
/* store result to destination bufer */
|
||||||
/* multiply complex buffer imaginary input with real buffer input */
|
pCmplxDst[5] = out6;
|
||||||
out8 = inA8 * inB4;
|
|
||||||
|
/* increment real input buffer by 4 to process next samples */
|
||||||
/* store result to destination bufer */
|
pSrcReal += 4U;
|
||||||
pCmplxDst[2] = out3;
|
|
||||||
pCmplxDst[3] = out4;
|
/* store result to destination bufer */
|
||||||
pCmplxDst[4] = out5;
|
pCmplxDst[6] = out7;
|
||||||
|
pCmplxDst[7] = out8;
|
||||||
/* incremnet complex input buffer by 8 to process next samples */
|
|
||||||
pSrcCmplx += 8u;
|
/* increment destination buffer by 8 to process next sampels */
|
||||||
|
pCmplxDst += 8U;
|
||||||
/* store result to destination bufer */
|
|
||||||
pCmplxDst[5] = out6;
|
/* Decrement the numSamples loop counter */
|
||||||
|
blkCnt--;
|
||||||
/* increment real input buffer by 4 to process next samples */
|
}
|
||||||
pSrcReal += 4u;
|
|
||||||
|
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||||
/* store result to destination bufer */
|
** No loop unrolling is used. */
|
||||||
pCmplxDst[6] = out7;
|
blkCnt = numSamples % 0x4U;
|
||||||
pCmplxDst[7] = out8;
|
|
||||||
|
#else
|
||||||
/* increment destination buffer by 8 to process next sampels */
|
|
||||||
pCmplxDst += 8u;
|
/* Run the below code for Cortex-M0 */
|
||||||
|
blkCnt = numSamples;
|
||||||
/* Decrement the numSamples loop counter */
|
|
||||||
blkCnt--;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
}
|
|
||||||
|
while (blkCnt > 0U)
|
||||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
{
|
||||||
** No loop unrolling is used. */
|
/* C[2 * i] = A[2 * i] * B[i]. */
|
||||||
blkCnt = numSamples % 0x4u;
|
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
|
||||||
|
in = *pSrcReal++;
|
||||||
#else
|
/* store the result in the destination buffer. */
|
||||||
|
*pCmplxDst++ = (*pSrcCmplx++) * (in);
|
||||||
/* Run the below code for Cortex-M0 */
|
*pCmplxDst++ = (*pSrcCmplx++) * (in);
|
||||||
blkCnt = numSamples;
|
|
||||||
|
/* Decrement the numSamples loop counter */
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
blkCnt--;
|
||||||
|
}
|
||||||
while(blkCnt > 0u)
|
}
|
||||||
{
|
|
||||||
/* C[2 * i] = A[2 * i] * B[i]. */
|
/**
|
||||||
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
|
* @} end of CmplxByRealMult group
|
||||||
in = *pSrcReal++;
|
*/
|
||||||
/* store the result in the destination buffer. */
|
|
||||||
*pCmplxDst++ = (*pSrcCmplx++) * (in);
|
|
||||||
*pCmplxDst++ = (*pSrcCmplx++) * (in);
|
|
||||||
|
|
||||||
/* Decrement the numSamples loop counter */
|
|
||||||
blkCnt--;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of CmplxByRealMult group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,203 +1,191 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cmplx_mult_real_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q15 complex by real multiplication
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cmplx_mult_real_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q15 complex by real multiplication
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupCmplxMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup CmplxByRealMult
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_math.h"
|
/**
|
||||||
|
* @brief Q15 complex-by-real multiplication
|
||||||
/**
|
* @param[in] *pSrcCmplx points to the complex input vector
|
||||||
* @ingroup groupCmplxMath
|
* @param[in] *pSrcReal points to the real input vector
|
||||||
*/
|
* @param[out] *pCmplxDst points to the complex output vector
|
||||||
|
* @param[in] numSamples number of samples in each vector
|
||||||
/**
|
* @return none.
|
||||||
* @addtogroup CmplxByRealMult
|
*
|
||||||
* @{
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
*/
|
* \par
|
||||||
|
* The function uses saturating arithmetic.
|
||||||
|
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
||||||
/**
|
*/
|
||||||
* @brief Q15 complex-by-real multiplication
|
|
||||||
* @param[in] *pSrcCmplx points to the complex input vector
|
void arm_cmplx_mult_real_q15(
|
||||||
* @param[in] *pSrcReal points to the real input vector
|
q15_t * pSrcCmplx,
|
||||||
* @param[out] *pCmplxDst points to the complex output vector
|
q15_t * pSrcReal,
|
||||||
* @param[in] numSamples number of samples in each vector
|
q15_t * pCmplxDst,
|
||||||
* @return none.
|
uint32_t numSamples)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
q15_t in; /* Temporary variable to store input value */
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
#if defined (ARM_MATH_DSP)
|
||||||
* Results outside of the allowable Q15 range [0x8000 0x7FFF] will be saturated.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
uint32_t blkCnt; /* loop counters */
|
||||||
void arm_cmplx_mult_real_q15(
|
q31_t inA1, inA2; /* Temporary variables to hold input data */
|
||||||
q15_t * pSrcCmplx,
|
q31_t inB1; /* Temporary variables to hold input data */
|
||||||
q15_t * pSrcReal,
|
q15_t out1, out2, out3, out4; /* Temporary variables to hold output data */
|
||||||
q15_t * pCmplxDst,
|
q31_t mul1, mul2, mul3, mul4; /* Temporary variables to hold intermediate data */
|
||||||
uint32_t numSamples)
|
|
||||||
{
|
/* loop Unrolling */
|
||||||
q15_t in; /* Temporary variable to store input value */
|
blkCnt = numSamples >> 2U;
|
||||||
|
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
while (blkCnt > 0U)
|
||||||
uint32_t blkCnt; /* loop counters */
|
{
|
||||||
q31_t inA1, inA2; /* Temporary variables to hold input data */
|
/* C[2 * i] = A[2 * i] * B[i]. */
|
||||||
q31_t inB1; /* Temporary variables to hold input data */
|
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
|
||||||
q15_t out1, out2, out3, out4; /* Temporary variables to hold output data */
|
/* read complex number both real and imaginary from complex input buffer */
|
||||||
q31_t mul1, mul2, mul3, mul4; /* Temporary variables to hold intermediate data */
|
inA1 = *__SIMD32(pSrcCmplx)++;
|
||||||
|
/* read two real values at a time from real input buffer */
|
||||||
/* loop Unrolling */
|
inB1 = *__SIMD32(pSrcReal)++;
|
||||||
blkCnt = numSamples >> 2u;
|
/* read complex number both real and imaginary from complex input buffer */
|
||||||
|
inA2 = *__SIMD32(pSrcCmplx)++;
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
/* multiply complex number with real numbers */
|
||||||
while(blkCnt > 0u)
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
{
|
|
||||||
/* C[2 * i] = A[2 * i] * B[i]. */
|
mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1));
|
||||||
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
|
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1));
|
||||||
/* read complex number both real and imaginary from complex input buffer */
|
mul3 = (q31_t) ((q15_t) (inA2) * (q15_t) (inB1 >> 16));
|
||||||
inA1 = *__SIMD32(pSrcCmplx)++;
|
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16));
|
||||||
/* read two real values at a time from real input buffer */
|
|
||||||
inB1 = *__SIMD32(pSrcReal)++;
|
#else
|
||||||
/* read complex number both real and imaginary from complex input buffer */
|
|
||||||
inA2 = *__SIMD32(pSrcCmplx)++;
|
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
|
||||||
|
mul1 = (q31_t) ((q15_t) inA1 * (q15_t) (inB1 >> 16));
|
||||||
/* multiply complex number with real numbers */
|
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1);
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1);
|
||||||
|
|
||||||
mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1));
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1));
|
|
||||||
mul3 = (q31_t) ((q15_t) (inA2) * (q15_t) (inB1 >> 16));
|
/* saturate the result */
|
||||||
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16));
|
out1 = (q15_t) __SSAT(mul1 >> 15U, 16);
|
||||||
|
out2 = (q15_t) __SSAT(mul2 >> 15U, 16);
|
||||||
#else
|
out3 = (q15_t) __SSAT(mul3 >> 15U, 16);
|
||||||
|
out4 = (q15_t) __SSAT(mul4 >> 15U, 16);
|
||||||
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
|
|
||||||
mul1 = (q31_t) ((q15_t) inA1 * (q15_t) (inB1 >> 16));
|
/* pack real and imaginary outputs and store them to destination */
|
||||||
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1);
|
*__SIMD32(pCmplxDst)++ = __PKHBT(out1, out2, 16);
|
||||||
mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1);
|
*__SIMD32(pCmplxDst)++ = __PKHBT(out3, out4, 16);
|
||||||
|
|
||||||
#endif // #ifndef ARM_MATH_BIG_ENDIAN
|
inA1 = *__SIMD32(pSrcCmplx)++;
|
||||||
|
inB1 = *__SIMD32(pSrcReal)++;
|
||||||
/* saturate the result */
|
inA2 = *__SIMD32(pSrcCmplx)++;
|
||||||
out1 = (q15_t) __SSAT(mul1 >> 15u, 16);
|
|
||||||
out2 = (q15_t) __SSAT(mul2 >> 15u, 16);
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
out3 = (q15_t) __SSAT(mul3 >> 15u, 16);
|
|
||||||
out4 = (q15_t) __SSAT(mul4 >> 15u, 16);
|
mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1));
|
||||||
|
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1));
|
||||||
/* pack real and imaginary outputs and store them to destination */
|
mul3 = (q31_t) ((q15_t) (inA2) * (q15_t) (inB1 >> 16));
|
||||||
*__SIMD32(pCmplxDst)++ = __PKHBT(out1, out2, 16);
|
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16));
|
||||||
*__SIMD32(pCmplxDst)++ = __PKHBT(out3, out4, 16);
|
|
||||||
|
#else
|
||||||
inA1 = *__SIMD32(pSrcCmplx)++;
|
|
||||||
inB1 = *__SIMD32(pSrcReal)++;
|
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
|
||||||
inA2 = *__SIMD32(pSrcCmplx)++;
|
mul1 = (q31_t) ((q15_t) inA1 * (q15_t) (inB1 >> 16));
|
||||||
|
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1);
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1);
|
||||||
|
|
||||||
mul1 = (q31_t) ((q15_t) (inA1) * (q15_t) (inB1));
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1));
|
|
||||||
mul3 = (q31_t) ((q15_t) (inA2) * (q15_t) (inB1 >> 16));
|
out1 = (q15_t) __SSAT(mul1 >> 15U, 16);
|
||||||
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) (inB1 >> 16));
|
out2 = (q15_t) __SSAT(mul2 >> 15U, 16);
|
||||||
|
out3 = (q15_t) __SSAT(mul3 >> 15U, 16);
|
||||||
#else
|
out4 = (q15_t) __SSAT(mul4 >> 15U, 16);
|
||||||
|
|
||||||
mul2 = (q31_t) ((q15_t) (inA1 >> 16) * (q15_t) (inB1 >> 16));
|
*__SIMD32(pCmplxDst)++ = __PKHBT(out1, out2, 16);
|
||||||
mul1 = (q31_t) ((q15_t) inA1 * (q15_t) (inB1 >> 16));
|
*__SIMD32(pCmplxDst)++ = __PKHBT(out3, out4, 16);
|
||||||
mul4 = (q31_t) ((q15_t) (inA2 >> 16) * (q15_t) inB1);
|
|
||||||
mul3 = (q31_t) ((q15_t) inA2 * (q15_t) inB1);
|
/* Decrement the numSamples loop counter */
|
||||||
|
blkCnt--;
|
||||||
#endif // #ifndef ARM_MATH_BIG_ENDIAN
|
}
|
||||||
|
|
||||||
out1 = (q15_t) __SSAT(mul1 >> 15u, 16);
|
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||||
out2 = (q15_t) __SSAT(mul2 >> 15u, 16);
|
** No loop unrolling is used. */
|
||||||
out3 = (q15_t) __SSAT(mul3 >> 15u, 16);
|
blkCnt = numSamples % 0x4U;
|
||||||
out4 = (q15_t) __SSAT(mul4 >> 15u, 16);
|
|
||||||
|
while (blkCnt > 0U)
|
||||||
*__SIMD32(pCmplxDst)++ = __PKHBT(out1, out2, 16);
|
{
|
||||||
*__SIMD32(pCmplxDst)++ = __PKHBT(out3, out4, 16);
|
/* C[2 * i] = A[2 * i] * B[i]. */
|
||||||
|
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
|
||||||
/* Decrement the numSamples loop counter */
|
in = *pSrcReal++;
|
||||||
blkCnt--;
|
/* store the result in the destination buffer. */
|
||||||
}
|
*pCmplxDst++ =
|
||||||
|
(q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
|
||||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
*pCmplxDst++ =
|
||||||
** No loop unrolling is used. */
|
(q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
|
||||||
blkCnt = numSamples % 0x4u;
|
|
||||||
|
/* Decrement the numSamples loop counter */
|
||||||
while(blkCnt > 0u)
|
blkCnt--;
|
||||||
{
|
}
|
||||||
/* C[2 * i] = A[2 * i] * B[i]. */
|
|
||||||
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
|
#else
|
||||||
in = *pSrcReal++;
|
|
||||||
/* store the result in the destination buffer. */
|
/* Run the below code for Cortex-M0 */
|
||||||
*pCmplxDst++ =
|
|
||||||
(q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
|
while (numSamples > 0U)
|
||||||
*pCmplxDst++ =
|
{
|
||||||
(q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
|
/* realOut = realA * realB. */
|
||||||
|
/* imagOut = imagA * realB. */
|
||||||
/* Decrement the numSamples loop counter */
|
in = *pSrcReal++;
|
||||||
blkCnt--;
|
/* store the result in the destination buffer. */
|
||||||
}
|
*pCmplxDst++ =
|
||||||
|
(q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
|
||||||
#else
|
*pCmplxDst++ =
|
||||||
|
(q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
|
||||||
/* Run the below code for Cortex-M0 */
|
|
||||||
|
/* Decrement the numSamples loop counter */
|
||||||
while(numSamples > 0u)
|
numSamples--;
|
||||||
{
|
}
|
||||||
/* realOut = realA * realB. */
|
|
||||||
/* imagOut = imagA * realB. */
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
in = *pSrcReal++;
|
|
||||||
/* store the result in the destination buffer. */
|
}
|
||||||
*pCmplxDst++ =
|
|
||||||
(q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
|
/**
|
||||||
*pCmplxDst++ =
|
* @} end of CmplxByRealMult group
|
||||||
(q15_t) __SSAT((((q31_t) (*pSrcCmplx++) * (in)) >> 15), 16);
|
*/
|
||||||
|
|
||||||
/* Decrement the numSamples loop counter */
|
|
||||||
numSamples--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of CmplxByRealMult group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,223 +1,211 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cmplx_mult_real_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q31 complex by real multiplication
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cmplx_mult_real_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q31 complex by real multiplication
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupCmplxMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup CmplxByRealMult
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
#include "arm_math.h"
|
/**
|
||||||
|
* @brief Q31 complex-by-real multiplication
|
||||||
/**
|
* @param[in] *pSrcCmplx points to the complex input vector
|
||||||
* @ingroup groupCmplxMath
|
* @param[in] *pSrcReal points to the real input vector
|
||||||
*/
|
* @param[out] *pCmplxDst points to the complex output vector
|
||||||
|
* @param[in] numSamples number of samples in each vector
|
||||||
/**
|
* @return none.
|
||||||
* @addtogroup CmplxByRealMult
|
*
|
||||||
* @{
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
*/
|
* \par
|
||||||
|
* The function uses saturating arithmetic.
|
||||||
|
* Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.
|
||||||
/**
|
*/
|
||||||
* @brief Q31 complex-by-real multiplication
|
|
||||||
* @param[in] *pSrcCmplx points to the complex input vector
|
void arm_cmplx_mult_real_q31(
|
||||||
* @param[in] *pSrcReal points to the real input vector
|
q31_t * pSrcCmplx,
|
||||||
* @param[out] *pCmplxDst points to the complex output vector
|
q31_t * pSrcReal,
|
||||||
* @param[in] numSamples number of samples in each vector
|
q31_t * pCmplxDst,
|
||||||
* @return none.
|
uint32_t numSamples)
|
||||||
*
|
{
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
q31_t inA1; /* Temporary variable to store input value */
|
||||||
* \par
|
|
||||||
* The function uses saturating arithmetic.
|
#if defined (ARM_MATH_DSP)
|
||||||
* Results outside of the allowable Q31 range[0x80000000 0x7FFFFFFF] will be saturated.
|
|
||||||
*/
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
|
uint32_t blkCnt; /* loop counters */
|
||||||
void arm_cmplx_mult_real_q31(
|
q31_t inA2, inA3, inA4; /* Temporary variables to hold input data */
|
||||||
q31_t * pSrcCmplx,
|
q31_t inB1, inB2; /* Temporary variabels to hold input data */
|
||||||
q31_t * pSrcReal,
|
q31_t out1, out2, out3, out4; /* Temporary variables to hold output data */
|
||||||
q31_t * pCmplxDst,
|
|
||||||
uint32_t numSamples)
|
/* loop Unrolling */
|
||||||
{
|
blkCnt = numSamples >> 2U;
|
||||||
q31_t inA1; /* Temporary variable to store input value */
|
|
||||||
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
|
while (blkCnt > 0U)
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
{
|
||||||
uint32_t blkCnt; /* loop counters */
|
/* C[2 * i] = A[2 * i] * B[i]. */
|
||||||
q31_t inA2, inA3, inA4; /* Temporary variables to hold input data */
|
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
|
||||||
q31_t inB1, inB2; /* Temporary variabels to hold input data */
|
/* read real input from complex input buffer */
|
||||||
q31_t out1, out2, out3, out4; /* Temporary variables to hold output data */
|
inA1 = *pSrcCmplx++;
|
||||||
|
inA2 = *pSrcCmplx++;
|
||||||
/* loop Unrolling */
|
/* read input from real input bufer */
|
||||||
blkCnt = numSamples >> 2u;
|
inB1 = *pSrcReal++;
|
||||||
|
inB2 = *pSrcReal++;
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
/* read imaginary input from complex input buffer */
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
inA3 = *pSrcCmplx++;
|
||||||
while(blkCnt > 0u)
|
inA4 = *pSrcCmplx++;
|
||||||
{
|
|
||||||
/* C[2 * i] = A[2 * i] * B[i]. */
|
/* multiply complex input with real input */
|
||||||
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
|
out1 = ((q63_t) inA1 * inB1) >> 32;
|
||||||
/* read real input from complex input buffer */
|
out2 = ((q63_t) inA2 * inB1) >> 32;
|
||||||
inA1 = *pSrcCmplx++;
|
out3 = ((q63_t) inA3 * inB2) >> 32;
|
||||||
inA2 = *pSrcCmplx++;
|
out4 = ((q63_t) inA4 * inB2) >> 32;
|
||||||
/* read input from real input bufer */
|
|
||||||
inB1 = *pSrcReal++;
|
/* sature the result */
|
||||||
inB2 = *pSrcReal++;
|
out1 = __SSAT(out1, 31);
|
||||||
/* read imaginary input from complex input buffer */
|
out2 = __SSAT(out2, 31);
|
||||||
inA3 = *pSrcCmplx++;
|
out3 = __SSAT(out3, 31);
|
||||||
inA4 = *pSrcCmplx++;
|
out4 = __SSAT(out4, 31);
|
||||||
|
|
||||||
/* multiply complex input with real input */
|
/* get result in 1.31 format */
|
||||||
out1 = ((q63_t) inA1 * inB1) >> 32;
|
out1 = out1 << 1;
|
||||||
out2 = ((q63_t) inA2 * inB1) >> 32;
|
out2 = out2 << 1;
|
||||||
out3 = ((q63_t) inA3 * inB2) >> 32;
|
out3 = out3 << 1;
|
||||||
out4 = ((q63_t) inA4 * inB2) >> 32;
|
out4 = out4 << 1;
|
||||||
|
|
||||||
/* sature the result */
|
/* store the result to destination buffer */
|
||||||
out1 = __SSAT(out1, 31);
|
*pCmplxDst++ = out1;
|
||||||
out2 = __SSAT(out2, 31);
|
*pCmplxDst++ = out2;
|
||||||
out3 = __SSAT(out3, 31);
|
*pCmplxDst++ = out3;
|
||||||
out4 = __SSAT(out4, 31);
|
*pCmplxDst++ = out4;
|
||||||
|
|
||||||
/* get result in 1.31 format */
|
/* read real input from complex input buffer */
|
||||||
out1 = out1 << 1;
|
inA1 = *pSrcCmplx++;
|
||||||
out2 = out2 << 1;
|
inA2 = *pSrcCmplx++;
|
||||||
out3 = out3 << 1;
|
/* read input from real input bufer */
|
||||||
out4 = out4 << 1;
|
inB1 = *pSrcReal++;
|
||||||
|
inB2 = *pSrcReal++;
|
||||||
/* store the result to destination buffer */
|
/* read imaginary input from complex input buffer */
|
||||||
*pCmplxDst++ = out1;
|
inA3 = *pSrcCmplx++;
|
||||||
*pCmplxDst++ = out2;
|
inA4 = *pSrcCmplx++;
|
||||||
*pCmplxDst++ = out3;
|
|
||||||
*pCmplxDst++ = out4;
|
/* multiply complex input with real input */
|
||||||
|
out1 = ((q63_t) inA1 * inB1) >> 32;
|
||||||
/* read real input from complex input buffer */
|
out2 = ((q63_t) inA2 * inB1) >> 32;
|
||||||
inA1 = *pSrcCmplx++;
|
out3 = ((q63_t) inA3 * inB2) >> 32;
|
||||||
inA2 = *pSrcCmplx++;
|
out4 = ((q63_t) inA4 * inB2) >> 32;
|
||||||
/* read input from real input bufer */
|
|
||||||
inB1 = *pSrcReal++;
|
/* sature the result */
|
||||||
inB2 = *pSrcReal++;
|
out1 = __SSAT(out1, 31);
|
||||||
/* read imaginary input from complex input buffer */
|
out2 = __SSAT(out2, 31);
|
||||||
inA3 = *pSrcCmplx++;
|
out3 = __SSAT(out3, 31);
|
||||||
inA4 = *pSrcCmplx++;
|
out4 = __SSAT(out4, 31);
|
||||||
|
|
||||||
/* multiply complex input with real input */
|
/* get result in 1.31 format */
|
||||||
out1 = ((q63_t) inA1 * inB1) >> 32;
|
out1 = out1 << 1;
|
||||||
out2 = ((q63_t) inA2 * inB1) >> 32;
|
out2 = out2 << 1;
|
||||||
out3 = ((q63_t) inA3 * inB2) >> 32;
|
out3 = out3 << 1;
|
||||||
out4 = ((q63_t) inA4 * inB2) >> 32;
|
out4 = out4 << 1;
|
||||||
|
|
||||||
/* sature the result */
|
/* store the result to destination buffer */
|
||||||
out1 = __SSAT(out1, 31);
|
*pCmplxDst++ = out1;
|
||||||
out2 = __SSAT(out2, 31);
|
*pCmplxDst++ = out2;
|
||||||
out3 = __SSAT(out3, 31);
|
*pCmplxDst++ = out3;
|
||||||
out4 = __SSAT(out4, 31);
|
*pCmplxDst++ = out4;
|
||||||
|
|
||||||
/* get result in 1.31 format */
|
/* Decrement the numSamples loop counter */
|
||||||
out1 = out1 << 1;
|
blkCnt--;
|
||||||
out2 = out2 << 1;
|
}
|
||||||
out3 = out3 << 1;
|
|
||||||
out4 = out4 << 1;
|
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
||||||
|
** No loop unrolling is used. */
|
||||||
/* store the result to destination buffer */
|
blkCnt = numSamples % 0x4U;
|
||||||
*pCmplxDst++ = out1;
|
|
||||||
*pCmplxDst++ = out2;
|
while (blkCnt > 0U)
|
||||||
*pCmplxDst++ = out3;
|
{
|
||||||
*pCmplxDst++ = out4;
|
/* C[2 * i] = A[2 * i] * B[i]. */
|
||||||
|
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
|
||||||
/* Decrement the numSamples loop counter */
|
/* read real input from complex input buffer */
|
||||||
blkCnt--;
|
inA1 = *pSrcCmplx++;
|
||||||
}
|
inA2 = *pSrcCmplx++;
|
||||||
|
/* read input from real input bufer */
|
||||||
/* If the numSamples is not a multiple of 4, compute any remaining output samples here.
|
inB1 = *pSrcReal++;
|
||||||
** No loop unrolling is used. */
|
|
||||||
blkCnt = numSamples % 0x4u;
|
/* multiply complex input with real input */
|
||||||
|
out1 = ((q63_t) inA1 * inB1) >> 32;
|
||||||
while(blkCnt > 0u)
|
out2 = ((q63_t) inA2 * inB1) >> 32;
|
||||||
{
|
|
||||||
/* C[2 * i] = A[2 * i] * B[i]. */
|
/* sature the result */
|
||||||
/* C[2 * i + 1] = A[2 * i + 1] * B[i]. */
|
out1 = __SSAT(out1, 31);
|
||||||
/* read real input from complex input buffer */
|
out2 = __SSAT(out2, 31);
|
||||||
inA1 = *pSrcCmplx++;
|
|
||||||
inA2 = *pSrcCmplx++;
|
/* get result in 1.31 format */
|
||||||
/* read input from real input bufer */
|
out1 = out1 << 1;
|
||||||
inB1 = *pSrcReal++;
|
out2 = out2 << 1;
|
||||||
|
|
||||||
/* multiply complex input with real input */
|
/* store the result to destination buffer */
|
||||||
out1 = ((q63_t) inA1 * inB1) >> 32;
|
*pCmplxDst++ = out1;
|
||||||
out2 = ((q63_t) inA2 * inB1) >> 32;
|
*pCmplxDst++ = out2;
|
||||||
|
|
||||||
/* sature the result */
|
/* Decrement the numSamples loop counter */
|
||||||
out1 = __SSAT(out1, 31);
|
blkCnt--;
|
||||||
out2 = __SSAT(out2, 31);
|
}
|
||||||
|
|
||||||
/* get result in 1.31 format */
|
#else
|
||||||
out1 = out1 << 1;
|
|
||||||
out2 = out2 << 1;
|
/* Run the below code for Cortex-M0 */
|
||||||
|
|
||||||
/* store the result to destination buffer */
|
while (numSamples > 0U)
|
||||||
*pCmplxDst++ = out1;
|
{
|
||||||
*pCmplxDst++ = out2;
|
/* realOut = realA * realB. */
|
||||||
|
/* imagReal = imagA * realB. */
|
||||||
/* Decrement the numSamples loop counter */
|
inA1 = *pSrcReal++;
|
||||||
blkCnt--;
|
/* store the result in the destination buffer. */
|
||||||
}
|
*pCmplxDst++ =
|
||||||
|
(q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * inA1) >> 31);
|
||||||
#else
|
*pCmplxDst++ =
|
||||||
|
(q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * inA1) >> 31);
|
||||||
/* Run the below code for Cortex-M0 */
|
|
||||||
|
/* Decrement the numSamples loop counter */
|
||||||
while(numSamples > 0u)
|
numSamples--;
|
||||||
{
|
}
|
||||||
/* realOut = realA * realB. */
|
|
||||||
/* imagReal = imagA * realB. */
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
inA1 = *pSrcReal++;
|
|
||||||
/* store the result in the destination buffer. */
|
}
|
||||||
*pCmplxDst++ =
|
|
||||||
(q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * inA1) >> 31);
|
/**
|
||||||
*pCmplxDst++ =
|
* @} end of CmplxByRealMult group
|
||||||
(q31_t) clip_q63_to_q31(((q63_t) * pSrcCmplx++ * inA1) >> 31);
|
*/
|
||||||
|
|
||||||
/* Decrement the numSamples loop counter */
|
|
||||||
numSamples--;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of CmplxByRealMult group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,87 +1,74 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_pid_init_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Floating-point PID Control initialization function
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_pid_init_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Floating-point PID Control initialization function
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
*
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
*
|
||||||
*
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* Redistribution and use in source and binary forms, with or without
|
*
|
||||||
* modification, are permitted provided that the following conditions
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* are met:
|
* not use this file except in compliance with the License.
|
||||||
* - Redistributions of source code must retain the above copyright
|
* You may obtain a copy of the License at
|
||||||
* notice, this list of conditions and the following disclaimer.
|
*
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* notice, this list of conditions and the following disclaimer in
|
*
|
||||||
* the documentation and/or other materials provided with the
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distribution.
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* may be used to endorse or promote products derived from this
|
* See the License for the specific language governing permissions and
|
||||||
* software without specific prior written permission.
|
* limitations under the License.
|
||||||
*
|
*/
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
#include "arm_math.h"
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
/**
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
* @addtogroup PID
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @{
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @brief Initialization function for the floating-point PID Control.
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @param[in,out] *S points to an instance of the PID structure.
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* @param[in] resetStateFlag flag to reset the state. 0 = no change in state & 1 = reset the state.
|
||||||
* ------------------------------------------------------------------- */
|
* @return none.
|
||||||
|
* \par Description:
|
||||||
#include "arm_math.h"
|
* \par
|
||||||
|
* The <code>resetStateFlag</code> specifies whether to set state to zero or not. \n
|
||||||
/**
|
* The function computes the structure fields: <code>A0</code>, <code>A1</code> <code>A2</code>
|
||||||
* @addtogroup PID
|
* using the proportional gain( \c Kp), integral gain( \c Ki) and derivative gain( \c Kd)
|
||||||
* @{
|
* also sets the state variables to all zeros.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/**
|
void arm_pid_init_f32(
|
||||||
* @brief Initialization function for the floating-point PID Control.
|
arm_pid_instance_f32 * S,
|
||||||
* @param[in,out] *S points to an instance of the PID structure.
|
int32_t resetStateFlag)
|
||||||
* @param[in] resetStateFlag flag to reset the state. 0 = no change in state & 1 = reset the state.
|
{
|
||||||
* @return none.
|
|
||||||
* \par Description:
|
/* Derived coefficient A0 */
|
||||||
* \par
|
S->A0 = S->Kp + S->Ki + S->Kd;
|
||||||
* The <code>resetStateFlag</code> specifies whether to set state to zero or not. \n
|
|
||||||
* The function computes the structure fields: <code>A0</code>, <code>A1</code> <code>A2</code>
|
/* Derived coefficient A1 */
|
||||||
* using the proportional gain( \c Kp), integral gain( \c Ki) and derivative gain( \c Kd)
|
S->A1 = (-S->Kp) - ((float32_t) 2.0 * S->Kd);
|
||||||
* also sets the state variables to all zeros.
|
|
||||||
*/
|
/* Derived coefficient A2 */
|
||||||
|
S->A2 = S->Kd;
|
||||||
void arm_pid_init_f32(
|
|
||||||
arm_pid_instance_f32 * S,
|
/* Check whether state needs reset or not */
|
||||||
int32_t resetStateFlag)
|
if (resetStateFlag)
|
||||||
{
|
{
|
||||||
|
/* Clear the state buffer. The size will be always 3 samples */
|
||||||
/* Derived coefficient A0 */
|
memset(S->state, 0, 3U * sizeof(float32_t));
|
||||||
S->A0 = S->Kp + S->Ki + S->Kd;
|
}
|
||||||
|
|
||||||
/* Derived coefficient A1 */
|
}
|
||||||
S->A1 = (-S->Kp) - ((float32_t) 2.0 * S->Kd);
|
|
||||||
|
/**
|
||||||
/* Derived coefficient A2 */
|
* @} end of PID group
|
||||||
S->A2 = S->Kd;
|
*/
|
||||||
|
|
||||||
/* Check whether state needs reset or not */
|
|
||||||
if(resetStateFlag)
|
|
||||||
{
|
|
||||||
/* Clear the state buffer. The size will be always 3 samples */
|
|
||||||
memset(S->state, 0, 3u * sizeof(float32_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of PID group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,122 +1,110 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_pid_init_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q15 PID Control initialization function
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_pid_init_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q15 PID Control initialization function
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @addtogroup PID
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
* @{
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
*/
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
/**
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @details
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* @param[in,out] *S points to an instance of the Q15 PID structure.
|
||||||
* -------------------------------------------------------------------- */
|
* @param[in] resetStateFlag flag to reset the state. 0 = no change in state 1 = reset the state.
|
||||||
|
* @return none.
|
||||||
#include "arm_math.h"
|
* \par Description:
|
||||||
|
* \par
|
||||||
/**
|
* The <code>resetStateFlag</code> specifies whether to set state to zero or not. \n
|
||||||
* @addtogroup PID
|
* The function computes the structure fields: <code>A0</code>, <code>A1</code> <code>A2</code>
|
||||||
* @{
|
* using the proportional gain( \c Kp), integral gain( \c Ki) and derivative gain( \c Kd)
|
||||||
*/
|
* also sets the state variables to all zeros.
|
||||||
|
*/
|
||||||
/**
|
|
||||||
* @details
|
void arm_pid_init_q15(
|
||||||
* @param[in,out] *S points to an instance of the Q15 PID structure.
|
arm_pid_instance_q15 * S,
|
||||||
* @param[in] resetStateFlag flag to reset the state. 0 = no change in state 1 = reset the state.
|
int32_t resetStateFlag)
|
||||||
* @return none.
|
{
|
||||||
* \par Description:
|
|
||||||
* \par
|
#if defined (ARM_MATH_DSP)
|
||||||
* The <code>resetStateFlag</code> specifies whether to set state to zero or not. \n
|
|
||||||
* The function computes the structure fields: <code>A0</code>, <code>A1</code> <code>A2</code>
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
* using the proportional gain( \c Kp), integral gain( \c Ki) and derivative gain( \c Kd)
|
|
||||||
* also sets the state variables to all zeros.
|
/* Derived coefficient A0 */
|
||||||
*/
|
S->A0 = __QADD16(__QADD16(S->Kp, S->Ki), S->Kd);
|
||||||
|
|
||||||
void arm_pid_init_q15(
|
/* Derived coefficients and pack into A1 */
|
||||||
arm_pid_instance_q15 * S,
|
|
||||||
int32_t resetStateFlag)
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
{
|
|
||||||
|
S->A1 = __PKHBT(-__QADD16(__QADD16(S->Kd, S->Kd), S->Kp), S->Kd, 16);
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
|
||||||
|
#else
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
|
||||||
|
S->A1 = __PKHBT(S->Kd, -__QADD16(__QADD16(S->Kd, S->Kd), S->Kp), 16);
|
||||||
/* Derived coefficient A0 */
|
|
||||||
S->A0 = __QADD16(__QADD16(S->Kp, S->Ki), S->Kd);
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
|
|
||||||
/* Derived coefficients and pack into A1 */
|
/* Check whether state needs reset or not */
|
||||||
|
if (resetStateFlag)
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
{
|
||||||
|
/* Clear the state buffer. The size will be always 3 samples */
|
||||||
S->A1 = __PKHBT(-__QADD16(__QADD16(S->Kd, S->Kd), S->Kp), S->Kd, 16);
|
memset(S->state, 0, 3U * sizeof(q15_t));
|
||||||
|
}
|
||||||
#else
|
|
||||||
|
#else
|
||||||
S->A1 = __PKHBT(S->Kd, -__QADD16(__QADD16(S->Kd, S->Kd), S->Kp), 16);
|
|
||||||
|
/* Run the below code for Cortex-M0 */
|
||||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
|
||||||
|
q31_t temp; /*to store the sum */
|
||||||
/* Check whether state needs reset or not */
|
|
||||||
if(resetStateFlag)
|
/* Derived coefficient A0 */
|
||||||
{
|
temp = S->Kp + S->Ki + S->Kd;
|
||||||
/* Clear the state buffer. The size will be always 3 samples */
|
S->A0 = (q15_t) __SSAT(temp, 16);
|
||||||
memset(S->state, 0, 3u * sizeof(q15_t));
|
|
||||||
}
|
/* Derived coefficients and pack into A1 */
|
||||||
|
temp = -(S->Kd + S->Kd + S->Kp);
|
||||||
#else
|
S->A1 = (q15_t) __SSAT(temp, 16);
|
||||||
|
S->A2 = S->Kd;
|
||||||
/* Run the below code for Cortex-M0 */
|
|
||||||
|
|
||||||
q31_t temp; /*to store the sum */
|
|
||||||
|
/* Check whether state needs reset or not */
|
||||||
/* Derived coefficient A0 */
|
if (resetStateFlag)
|
||||||
temp = S->Kp + S->Ki + S->Kd;
|
{
|
||||||
S->A0 = (q15_t) __SSAT(temp, 16);
|
/* Clear the state buffer. The size will be always 3 samples */
|
||||||
|
memset(S->state, 0, 3U * sizeof(q15_t));
|
||||||
/* Derived coefficients and pack into A1 */
|
}
|
||||||
temp = -(S->Kd + S->Kd + S->Kp);
|
|
||||||
S->A1 = (q15_t) __SSAT(temp, 16);
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
S->A2 = S->Kd;
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
/* Check whether state needs reset or not */
|
* @} end of PID group
|
||||||
if(resetStateFlag)
|
*/
|
||||||
{
|
|
||||||
/* Clear the state buffer. The size will be always 3 samples */
|
|
||||||
memset(S->state, 0, 3u * sizeof(q15_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of PID group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,107 +1,95 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_pid_init_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q31 PID Control initialization function
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_pid_init_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q31 PID Control initialization function
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @addtogroup PID
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
* @{
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
*/
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
/**
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @brief Initialization function for the Q31 PID Control.
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* @param[in,out] *S points to an instance of the Q31 PID structure.
|
||||||
* ------------------------------------------------------------------- */
|
* @param[in] resetStateFlag flag to reset the state. 0 = no change in state 1 = reset the state.
|
||||||
|
* @return none.
|
||||||
#include "arm_math.h"
|
* \par Description:
|
||||||
|
* \par
|
||||||
/**
|
* The <code>resetStateFlag</code> specifies whether to set state to zero or not. \n
|
||||||
* @addtogroup PID
|
* The function computes the structure fields: <code>A0</code>, <code>A1</code> <code>A2</code>
|
||||||
* @{
|
* using the proportional gain( \c Kp), integral gain( \c Ki) and derivative gain( \c Kd)
|
||||||
*/
|
* also sets the state variables to all zeros.
|
||||||
|
*/
|
||||||
/**
|
|
||||||
* @brief Initialization function for the Q31 PID Control.
|
void arm_pid_init_q31(
|
||||||
* @param[in,out] *S points to an instance of the Q31 PID structure.
|
arm_pid_instance_q31 * S,
|
||||||
* @param[in] resetStateFlag flag to reset the state. 0 = no change in state 1 = reset the state.
|
int32_t resetStateFlag)
|
||||||
* @return none.
|
{
|
||||||
* \par Description:
|
|
||||||
* \par
|
#if defined (ARM_MATH_DSP)
|
||||||
* The <code>resetStateFlag</code> specifies whether to set state to zero or not. \n
|
|
||||||
* The function computes the structure fields: <code>A0</code>, <code>A1</code> <code>A2</code>
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
* using the proportional gain( \c Kp), integral gain( \c Ki) and derivative gain( \c Kd)
|
|
||||||
* also sets the state variables to all zeros.
|
/* Derived coefficient A0 */
|
||||||
*/
|
S->A0 = __QADD(__QADD(S->Kp, S->Ki), S->Kd);
|
||||||
|
|
||||||
void arm_pid_init_q31(
|
/* Derived coefficient A1 */
|
||||||
arm_pid_instance_q31 * S,
|
S->A1 = -__QADD(__QADD(S->Kd, S->Kd), S->Kp);
|
||||||
int32_t resetStateFlag)
|
|
||||||
{
|
|
||||||
|
#else
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
|
||||||
|
/* Run the below code for Cortex-M0 */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
|
||||||
|
q31_t temp;
|
||||||
/* Derived coefficient A0 */
|
|
||||||
S->A0 = __QADD(__QADD(S->Kp, S->Ki), S->Kd);
|
/* Derived coefficient A0 */
|
||||||
|
temp = clip_q63_to_q31((q63_t) S->Kp + S->Ki);
|
||||||
/* Derived coefficient A1 */
|
S->A0 = clip_q63_to_q31((q63_t) temp + S->Kd);
|
||||||
S->A1 = -__QADD(__QADD(S->Kd, S->Kd), S->Kp);
|
|
||||||
|
/* Derived coefficient A1 */
|
||||||
|
temp = clip_q63_to_q31((q63_t) S->Kd + S->Kd);
|
||||||
#else
|
S->A1 = -clip_q63_to_q31((q63_t) temp + S->Kp);
|
||||||
|
|
||||||
/* Run the below code for Cortex-M0 */
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
q31_t temp;
|
/* Derived coefficient A2 */
|
||||||
|
S->A2 = S->Kd;
|
||||||
/* Derived coefficient A0 */
|
|
||||||
temp = clip_q63_to_q31((q63_t) S->Kp + S->Ki);
|
/* Check whether state needs reset or not */
|
||||||
S->A0 = clip_q63_to_q31((q63_t) temp + S->Kd);
|
if (resetStateFlag)
|
||||||
|
{
|
||||||
/* Derived coefficient A1 */
|
/* Clear the state buffer. The size will be always 3 samples */
|
||||||
temp = clip_q63_to_q31((q63_t) S->Kd + S->Kd);
|
memset(S->state, 0, 3U * sizeof(q31_t));
|
||||||
S->A1 = -clip_q63_to_q31((q63_t) temp + S->Kp);
|
}
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
}
|
||||||
|
|
||||||
/* Derived coefficient A2 */
|
/**
|
||||||
S->A2 = S->Kd;
|
* @} end of PID group
|
||||||
|
*/
|
||||||
/* Check whether state needs reset or not */
|
|
||||||
if(resetStateFlag)
|
|
||||||
{
|
|
||||||
/* Clear the state buffer. The size will be always 3 samples */
|
|
||||||
memset(S->state, 0, 3u * sizeof(q31_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of PID group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,65 +1,53 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_pid_reset_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Floating-point PID Control reset function
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_pid_reset_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Floating-point PID Control reset function
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @addtogroup PID
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
* @{
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
*/
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
/**
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @brief Reset function for the floating-point PID Control.
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* @param[in] *S Instance pointer of PID control data structure.
|
||||||
* ------------------------------------------------------------------- */
|
* @return none.
|
||||||
|
* \par Description:
|
||||||
#include "arm_math.h"
|
* The function resets the state buffer to zeros.
|
||||||
|
*/
|
||||||
/**
|
void arm_pid_reset_f32(
|
||||||
* @addtogroup PID
|
arm_pid_instance_f32 * S)
|
||||||
* @{
|
{
|
||||||
*/
|
|
||||||
|
/* Clear the state buffer. The size will be always 3 samples */
|
||||||
/**
|
memset(S->state, 0, 3U * sizeof(float32_t));
|
||||||
* @brief Reset function for the floating-point PID Control.
|
}
|
||||||
* @param[in] *S Instance pointer of PID control data structure.
|
|
||||||
* @return none.
|
/**
|
||||||
* \par Description:
|
* @} end of PID group
|
||||||
* The function resets the state buffer to zeros.
|
*/
|
||||||
*/
|
|
||||||
void arm_pid_reset_f32(
|
|
||||||
arm_pid_instance_f32 * S)
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Clear the state buffer. The size will be always 3 samples */
|
|
||||||
memset(S->state, 0, 3u * sizeof(float32_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of PID group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,64 +1,52 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_pid_reset_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q15 PID Control reset function
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_pid_reset_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q15 PID Control reset function
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @addtogroup PID
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
* @{
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
*/
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
/**
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @brief Reset function for the Q15 PID Control.
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* @param[in] *S Instance pointer of PID control data structure.
|
||||||
* -------------------------------------------------------------------- */
|
* @return none.
|
||||||
|
* \par Description:
|
||||||
#include "arm_math.h"
|
* The function resets the state buffer to zeros.
|
||||||
|
*/
|
||||||
/**
|
void arm_pid_reset_q15(
|
||||||
* @addtogroup PID
|
arm_pid_instance_q15 * S)
|
||||||
* @{
|
{
|
||||||
*/
|
/* Reset state to zero, The size will be always 3 samples */
|
||||||
|
memset(S->state, 0, 3U * sizeof(q15_t));
|
||||||
/**
|
}
|
||||||
* @brief Reset function for the Q15 PID Control.
|
|
||||||
* @param[in] *S Instance pointer of PID control data structure.
|
/**
|
||||||
* @return none.
|
* @} end of PID group
|
||||||
* \par Description:
|
*/
|
||||||
* The function resets the state buffer to zeros.
|
|
||||||
*/
|
|
||||||
void arm_pid_reset_q15(
|
|
||||||
arm_pid_instance_q15 * S)
|
|
||||||
{
|
|
||||||
/* Reset state to zero, The size will be always 3 samples */
|
|
||||||
memset(S->state, 0, 3u * sizeof(q15_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of PID group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,65 +1,53 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_pid_reset_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q31 PID Control reset function
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_pid_reset_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q31 PID Control reset function
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @addtogroup PID
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
* @{
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
*/
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
/**
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @brief Reset function for the Q31 PID Control.
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* @param[in] *S Instance pointer of PID control data structure.
|
||||||
* ------------------------------------------------------------------- */
|
* @return none.
|
||||||
|
* \par Description:
|
||||||
#include "arm_math.h"
|
* The function resets the state buffer to zeros.
|
||||||
|
*/
|
||||||
/**
|
void arm_pid_reset_q31(
|
||||||
* @addtogroup PID
|
arm_pid_instance_q31 * S)
|
||||||
* @{
|
{
|
||||||
*/
|
|
||||||
|
/* Clear the state buffer. The size will be always 3 samples */
|
||||||
/**
|
memset(S->state, 0, 3U * sizeof(q31_t));
|
||||||
* @brief Reset function for the Q31 PID Control.
|
}
|
||||||
* @param[in] *S Instance pointer of PID control data structure.
|
|
||||||
* @return none.
|
/**
|
||||||
* \par Description:
|
* @} end of PID group
|
||||||
* The function resets the state buffer to zeros.
|
*/
|
||||||
*/
|
|
||||||
void arm_pid_reset_q31(
|
|
||||||
arm_pid_instance_q31 * S)
|
|
||||||
{
|
|
||||||
|
|
||||||
/* Clear the state buffer. The size will be always 3 samples */
|
|
||||||
memset(S->state, 0, 3u * sizeof(q31_t));
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of PID group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,149 +1,144 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_sin_cos_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Sine and Cosine calculation for floating-point values
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_sin_cos_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Sine and Cosine calculation for floating-point values.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
#include "arm_common_tables.h"
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
/**
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
* @ingroup groupController
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
*/
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
/**
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @defgroup SinCos Sine Cosine
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*
|
||||||
* -------------------------------------------------------------------- */
|
* Computes the trigonometric sine and cosine values using a combination of table lookup
|
||||||
|
* and linear interpolation.
|
||||||
#include "arm_math.h"
|
* There are separate functions for Q31 and floating-point data types.
|
||||||
#include "arm_common_tables.h"
|
* The input to the floating-point version is in degrees while the
|
||||||
|
* fixed-point Q31 have a scaled input with the range
|
||||||
/**
|
* [-1 0.9999] mapping to [-180 +180] degrees.
|
||||||
* @ingroup groupController
|
*
|
||||||
*/
|
* The floating point function also allows values that are out of the usual range. When this happens, the function will
|
||||||
|
* take extra time to adjust the input value to the range of [-180 180].
|
||||||
/**
|
*
|
||||||
* @defgroup SinCos Sine Cosine
|
* The result is accurate to 5 digits after the decimal point.
|
||||||
*
|
*
|
||||||
* Computes the trigonometric sine and cosine values using a combination of table lookup
|
* The implementation is based on table lookup using 360 values together with linear interpolation.
|
||||||
* and linear interpolation.
|
* The steps used are:
|
||||||
* There are separate functions for Q31 and floating-point data types.
|
* -# Calculation of the nearest integer table index.
|
||||||
* The input to the floating-point version is in degrees while the
|
* -# Compute the fractional portion (fract) of the input.
|
||||||
* fixed-point Q31 have a scaled input with the range
|
* -# Fetch the value corresponding to \c index from sine table to \c y0 and also value from \c index+1 to \c y1.
|
||||||
* [-1 0.9999] mapping to [-180 +180] degrees.
|
* -# Sine value is computed as <code> *psinVal = y0 + (fract * (y1 - y0))</code>.
|
||||||
*
|
* -# Fetch the value corresponding to \c index from cosine table to \c y0 and also value from \c index+1 to \c y1.
|
||||||
* The floating point function also allows values that are out of the usual range. When this happens, the function will
|
* -# Cosine value is computed as <code> *pcosVal = y0 + (fract * (y1 - y0))</code>.
|
||||||
* take extra time to adjust the input value to the range of [-180 180].
|
*/
|
||||||
*
|
|
||||||
* The implementation is based on table lookup using 360 values together with linear interpolation.
|
/**
|
||||||
* The steps used are:
|
* @addtogroup SinCos
|
||||||
* -# Calculation of the nearest integer table index.
|
* @{
|
||||||
* -# Compute the fractional portion (fract) of the input.
|
*/
|
||||||
* -# Fetch the value corresponding to \c index from sine table to \c y0 and also value from \c index+1 to \c y1.
|
|
||||||
* -# Sine value is computed as <code> *psinVal = y0 + (fract * (y1 - y0))</code>.
|
/**
|
||||||
* -# Fetch the value corresponding to \c index from cosine table to \c y0 and also value from \c index+1 to \c y1.
|
* @brief Floating-point sin_cos function.
|
||||||
* -# Cosine value is computed as <code> *pcosVal = y0 + (fract * (y1 - y0))</code>.
|
* @param[in] theta input value in degrees
|
||||||
*/
|
* @param[out] *pSinVal points to the processed sine output.
|
||||||
|
* @param[out] *pCosVal points to the processed cos output.
|
||||||
/**
|
* @return none.
|
||||||
* @addtogroup SinCos
|
*/
|
||||||
* @{
|
|
||||||
*/
|
void arm_sin_cos_f32(
|
||||||
|
float32_t theta,
|
||||||
/**
|
float32_t * pSinVal,
|
||||||
* @brief Floating-point sin_cos function.
|
float32_t * pCosVal)
|
||||||
* @param[in] theta input value in degrees
|
{
|
||||||
* @param[out] *pSinVal points to the processed sine output.
|
float32_t fract, in; /* Temporary variables for input, output */
|
||||||
* @param[out] *pCosVal points to the processed cos output.
|
uint16_t indexS, indexC; /* Index variable */
|
||||||
* @return none.
|
float32_t f1, f2, d1, d2; /* Two nearest output values */
|
||||||
*/
|
float32_t findex, Dn, Df, temp;
|
||||||
|
|
||||||
void arm_sin_cos_f32(
|
/* input x is in degrees */
|
||||||
float32_t theta,
|
/* Scale the input, divide input by 360, for cosine add 0.25 (pi/2) to read sine table */
|
||||||
float32_t * pSinVal,
|
in = theta * 0.00277777777778f;
|
||||||
float32_t * pCosVal)
|
|
||||||
{
|
if (in < 0.0f)
|
||||||
float32_t fract, in; /* Temporary variables for input, output */
|
{
|
||||||
uint16_t indexS, indexC; /* Index variable */
|
in = -in;
|
||||||
float32_t f1, f2, d1, d2; /* Two nearest output values */
|
}
|
||||||
int32_t n;
|
|
||||||
float32_t findex, Dn, Df, temp;
|
in = in - (int32_t)in;
|
||||||
|
|
||||||
/* input x is in degrees */
|
/* Calculation of index of the table */
|
||||||
/* Scale the input, divide input by 360, for cosine add 0.25 (pi/2) to read sine table */
|
findex = (float32_t) FAST_MATH_TABLE_SIZE * in;
|
||||||
in = theta * 0.00277777777778f;
|
indexS = ((uint16_t)findex) & 0x1ff;
|
||||||
|
indexC = (indexS + (FAST_MATH_TABLE_SIZE / 4)) & 0x1ff;
|
||||||
/* Calculation of floor value of input */
|
|
||||||
n = (int32_t) in;
|
/* fractional value calculation */
|
||||||
|
fract = findex - (float32_t) indexS;
|
||||||
/* Make negative values towards -infinity */
|
|
||||||
if(in < 0.0f)
|
/* Read two nearest values of input value from the cos & sin tables */
|
||||||
{
|
f1 = sinTable_f32[indexC+0];
|
||||||
n--;
|
f2 = sinTable_f32[indexC+1];
|
||||||
}
|
d1 = -sinTable_f32[indexS+0];
|
||||||
/* Map input value to [0 1] */
|
d2 = -sinTable_f32[indexS+1];
|
||||||
in = in - (float32_t) n;
|
|
||||||
|
temp = (1.0f - fract) * f1 + fract * f2;
|
||||||
/* Calculation of index of the table */
|
|
||||||
findex = (float32_t) FAST_MATH_TABLE_SIZE * in;
|
Dn = 0.0122718463030f; // delta between the two points (fixed), in this case 2*pi/FAST_MATH_TABLE_SIZE
|
||||||
indexS = ((uint16_t)findex) & 0x1ff;
|
Df = f2 - f1; // delta between the values of the functions
|
||||||
indexC = (indexS + (FAST_MATH_TABLE_SIZE / 4)) & 0x1ff;
|
|
||||||
|
temp = Dn *(d1 + d2) - 2 * Df;
|
||||||
/* fractional value calculation */
|
temp = fract * temp + (3 * Df - (d2 + 2 * d1) * Dn);
|
||||||
fract = findex - (float32_t) indexS;
|
temp = fract * temp + d1 * Dn;
|
||||||
|
|
||||||
/* Read two nearest values of input value from the cos & sin tables */
|
/* Calculation of cosine value */
|
||||||
f1 = sinTable_f32[indexC+0];
|
*pCosVal = fract * temp + f1;
|
||||||
f2 = sinTable_f32[indexC+1];
|
|
||||||
d1 = -sinTable_f32[indexS+0];
|
/* Read two nearest values of input value from the cos & sin tables */
|
||||||
d2 = -sinTable_f32[indexS+1];
|
f1 = sinTable_f32[indexS+0];
|
||||||
|
f2 = sinTable_f32[indexS+1];
|
||||||
Dn = 0.0122718463030f; // delta between the two points (fixed), in this case 2*pi/FAST_MATH_TABLE_SIZE
|
d1 = sinTable_f32[indexC+0];
|
||||||
Df = f2 - f1; // delta between the values of the functions
|
d2 = sinTable_f32[indexC+1];
|
||||||
temp = Dn*(d1 + d2) - 2*Df;
|
|
||||||
temp = fract*temp + (3*Df - (d2 + 2*d1)*Dn);
|
temp = (1.0f - fract) * f1 + fract * f2;
|
||||||
temp = fract*temp + d1*Dn;
|
|
||||||
|
Df = f2 - f1; // delta between the values of the functions
|
||||||
/* Calculation of cosine value */
|
temp = Dn*(d1 + d2) - 2*Df;
|
||||||
*pCosVal = fract*temp + f1;
|
temp = fract*temp + (3*Df - (d2 + 2*d1)*Dn);
|
||||||
|
temp = fract*temp + d1*Dn;
|
||||||
/* Read two nearest values of input value from the cos & sin tables */
|
|
||||||
f1 = sinTable_f32[indexS+0];
|
/* Calculation of sine value */
|
||||||
f2 = sinTable_f32[indexS+1];
|
*pSinVal = fract*temp + f1;
|
||||||
d1 = sinTable_f32[indexC+0];
|
|
||||||
d2 = sinTable_f32[indexC+1];
|
if (theta < 0.0f)
|
||||||
|
{
|
||||||
Df = f2 - f1; // delta between the values of the functions
|
*pSinVal = -*pSinVal;
|
||||||
temp = Dn*(d1 + d2) - 2*Df;
|
}
|
||||||
temp = fract*temp + (3*Df - (d2 + 2*d1)*Dn);
|
}
|
||||||
temp = fract*temp + d1*Dn;
|
/**
|
||||||
|
* @} end of SinCos group
|
||||||
/* Calculation of sine value */
|
*/
|
||||||
*pSinVal = fract*temp + f1;
|
|
||||||
}
|
|
||||||
/**
|
|
||||||
* @} end of SinCos group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,122 +1,110 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_sin_cos_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Cosine & Sine calculation for Q31 values
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_sin_cos_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Cosine & Sine calculation for Q31 values.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
#include "arm_common_tables.h"
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
/**
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
* @ingroup groupController
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
*/
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
/**
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @addtogroup SinCos
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* @{
|
||||||
* -------------------------------------------------------------------- */
|
*/
|
||||||
|
|
||||||
#include "arm_math.h"
|
/**
|
||||||
#include "arm_common_tables.h"
|
* @brief Q31 sin_cos function.
|
||||||
|
* @param[in] theta scaled input value in degrees
|
||||||
/**
|
* @param[out] *pSinVal points to the processed sine output.
|
||||||
* @ingroup groupController
|
* @param[out] *pCosVal points to the processed cosine output.
|
||||||
*/
|
* @return none.
|
||||||
|
*
|
||||||
/**
|
* The Q31 input value is in the range [-1 0.999999] and is mapped to a degree value in the range [-180 179].
|
||||||
* @addtogroup SinCos
|
*
|
||||||
* @{
|
*/
|
||||||
*/
|
|
||||||
|
void arm_sin_cos_q31(
|
||||||
/**
|
q31_t theta,
|
||||||
* @brief Q31 sin_cos function.
|
q31_t * pSinVal,
|
||||||
* @param[in] theta scaled input value in degrees
|
q31_t * pCosVal)
|
||||||
* @param[out] *pSinVal points to the processed sine output.
|
{
|
||||||
* @param[out] *pCosVal points to the processed cosine output.
|
q31_t fract; /* Temporary variables for input, output */
|
||||||
* @return none.
|
uint16_t indexS, indexC; /* Index variable */
|
||||||
*
|
q31_t f1, f2, d1, d2; /* Two nearest output values */
|
||||||
* The Q31 input value is in the range [-1 0.999999] and is mapped to a degree value in the range [-180 179].
|
q31_t Dn, Df;
|
||||||
*
|
q63_t temp;
|
||||||
*/
|
|
||||||
|
/* Calculate the nearest index */
|
||||||
void arm_sin_cos_q31(
|
indexS = (uint32_t)theta >> CONTROLLER_Q31_SHIFT;
|
||||||
q31_t theta,
|
indexC = (indexS + 128) & 0x1ff;
|
||||||
q31_t * pSinVal,
|
|
||||||
q31_t * pCosVal)
|
/* Calculation of fractional value */
|
||||||
{
|
fract = (theta - (indexS << CONTROLLER_Q31_SHIFT)) << 8;
|
||||||
q31_t fract; /* Temporary variables for input, output */
|
|
||||||
uint16_t indexS, indexC; /* Index variable */
|
/* Read two nearest values of input value from the cos & sin tables */
|
||||||
q31_t f1, f2, d1, d2; /* Two nearest output values */
|
f1 = sinTable_q31[indexC+0];
|
||||||
q31_t Dn, Df;
|
f2 = sinTable_q31[indexC+1];
|
||||||
q63_t temp;
|
d1 = -sinTable_q31[indexS+0];
|
||||||
|
d2 = -sinTable_q31[indexS+1];
|
||||||
/* Calculate the nearest index */
|
|
||||||
indexS = (uint32_t)theta >> CONTROLLER_Q31_SHIFT;
|
Dn = 0x1921FB5; // delta between the two points (fixed), in this case 2*pi/FAST_MATH_TABLE_SIZE
|
||||||
indexC = (indexS + 128) & 0x1ff;
|
Df = f2 - f1; // delta between the values of the functions
|
||||||
|
temp = Dn*((q63_t)d1 + d2);
|
||||||
/* Calculation of fractional value */
|
temp = temp - ((q63_t)Df << 32);
|
||||||
fract = (theta - (indexS << CONTROLLER_Q31_SHIFT)) << 8;
|
temp = (q63_t)fract*(temp >> 31);
|
||||||
|
temp = temp + ((3*(q63_t)Df << 31) - (d2 + ((q63_t)d1 << 1))*Dn);
|
||||||
/* Read two nearest values of input value from the cos & sin tables */
|
temp = (q63_t)fract*(temp >> 31);
|
||||||
f1 = sinTable_q31[indexC+0];
|
temp = temp + (q63_t)d1*Dn;
|
||||||
f2 = sinTable_q31[indexC+1];
|
temp = (q63_t)fract*(temp >> 31);
|
||||||
d1 = -sinTable_q31[indexS+0];
|
|
||||||
d2 = -sinTable_q31[indexS+1];
|
/* Calculation of cosine value */
|
||||||
|
*pCosVal = clip_q63_to_q31((temp >> 31) + (q63_t)f1);
|
||||||
Dn = 0x1921FB5; // delta between the two points (fixed), in this case 2*pi/FAST_MATH_TABLE_SIZE
|
|
||||||
Df = f2 - f1; // delta between the values of the functions
|
/* Read two nearest values of input value from the cos & sin tables */
|
||||||
temp = Dn*((q63_t)d1 + d2);
|
f1 = sinTable_q31[indexS+0];
|
||||||
temp = temp - ((q63_t)Df << 32);
|
f2 = sinTable_q31[indexS+1];
|
||||||
temp = (q63_t)fract*(temp >> 31);
|
d1 = sinTable_q31[indexC+0];
|
||||||
temp = temp + ((3*(q63_t)Df << 31) - (d2 + ((q63_t)d1 << 1))*Dn);
|
d2 = sinTable_q31[indexC+1];
|
||||||
temp = (q63_t)fract*(temp >> 31);
|
|
||||||
temp = temp + (q63_t)d1*Dn;
|
Df = f2 - f1; // delta between the values of the functions
|
||||||
temp = (q63_t)fract*(temp >> 31);
|
temp = Dn*((q63_t)d1 + d2);
|
||||||
|
temp = temp - ((q63_t)Df << 32);
|
||||||
/* Calculation of cosine value */
|
temp = (q63_t)fract*(temp >> 31);
|
||||||
*pCosVal = clip_q63_to_q31((temp >> 31) + (q63_t)f1);
|
temp = temp + ((3*(q63_t)Df << 31) - (d2 + ((q63_t)d1 << 1))*Dn);
|
||||||
|
temp = (q63_t)fract*(temp >> 31);
|
||||||
/* Read two nearest values of input value from the cos & sin tables */
|
temp = temp + (q63_t)d1*Dn;
|
||||||
f1 = sinTable_q31[indexS+0];
|
temp = (q63_t)fract*(temp >> 31);
|
||||||
f2 = sinTable_q31[indexS+1];
|
|
||||||
d1 = sinTable_q31[indexC+0];
|
/* Calculation of sine value */
|
||||||
d2 = sinTable_q31[indexC+1];
|
*pSinVal = clip_q63_to_q31((temp >> 31) + (q63_t)f1);
|
||||||
|
}
|
||||||
Df = f2 - f1; // delta between the values of the functions
|
|
||||||
temp = Dn*((q63_t)d1 + d2);
|
/**
|
||||||
temp = temp - ((q63_t)Df << 32);
|
* @} end of SinCos group
|
||||||
temp = (q63_t)fract*(temp >> 31);
|
*/
|
||||||
temp = temp + ((3*(q63_t)Df << 31) - (d2 + ((q63_t)d1 << 1))*Dn);
|
|
||||||
temp = (q63_t)fract*(temp >> 31);
|
|
||||||
temp = temp + (q63_t)d1*Dn;
|
|
||||||
temp = (q63_t)fract*(temp >> 31);
|
|
||||||
|
|
||||||
/* Calculation of sine value */
|
|
||||||
*pSinVal = clip_q63_to_q31((temp >> 31) + (q63_t)f1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of SinCos group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,138 +1,115 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cos_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Fast cosine calculation for floating-point values
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cos_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Fast cosine calculation for floating-point values.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
#include "arm_common_tables.h"
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupFastMath
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @defgroup cos Cosine
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* Computes the trigonometric cosine function using a combination of table lookup
|
||||||
* -------------------------------------------------------------------- */
|
* and linear interpolation. There are separate functions for
|
||||||
|
* Q15, Q31, and floating-point data types.
|
||||||
#include "arm_math.h"
|
* The input to the floating-point version is in radians and in the range [0 2*pi) while the
|
||||||
#include "arm_common_tables.h"
|
* fixed-point Q15 and Q31 have a scaled input with the range
|
||||||
/**
|
* [0 +0.9999] mapping to [0 2*pi). The fixed-point range is chosen so that a
|
||||||
* @ingroup groupFastMath
|
* value of 2*pi wraps around to 0.
|
||||||
*/
|
*
|
||||||
|
* The implementation is based on table lookup using 256 values together with linear interpolation.
|
||||||
/**
|
* The steps used are:
|
||||||
* @defgroup cos Cosine
|
* -# Calculation of the nearest integer table index
|
||||||
*
|
* -# Compute the fractional portion (fract) of the table index.
|
||||||
* Computes the trigonometric cosine function using a combination of table lookup
|
* -# The final result equals <code>(1.0f-fract)*a + fract*b;</code>
|
||||||
* and cubic interpolation. There are separate functions for
|
*
|
||||||
* Q15, Q31, and floating-point data types.
|
* where
|
||||||
* The input to the floating-point version is in radians while the
|
* <pre>
|
||||||
* fixed-point Q15 and Q31 have a scaled input with the range
|
* b=Table[index+0];
|
||||||
* [0 +0.9999] mapping to [0 2*pi). The fixed-point range is chosen so that a
|
* c=Table[index+1];
|
||||||
* value of 2*pi wraps around to 0.
|
* </pre>
|
||||||
*
|
*/
|
||||||
* The implementation is based on table lookup using 256 values together with cubic interpolation.
|
|
||||||
* The steps used are:
|
/**
|
||||||
* -# Calculation of the nearest integer table index
|
* @addtogroup cos
|
||||||
* -# Fetch the four table values a, b, c, and d
|
* @{
|
||||||
* -# Compute the fractional portion (fract) of the table index.
|
*/
|
||||||
* -# Calculation of wa, wb, wc, wd
|
|
||||||
* -# The final result equals <code>a*wa + b*wb + c*wc + d*wd</code>
|
/**
|
||||||
*
|
* @brief Fast approximation to the trigonometric cosine function for floating-point data.
|
||||||
* where
|
* @param[in] x input value in radians.
|
||||||
* <pre>
|
* @return cos(x).
|
||||||
* a=Table[index-1];
|
*/
|
||||||
* b=Table[index+0];
|
|
||||||
* c=Table[index+1];
|
float32_t arm_cos_f32(
|
||||||
* d=Table[index+2];
|
float32_t x)
|
||||||
* </pre>
|
{
|
||||||
* and
|
float32_t cosVal, fract, in; /* Temporary variables for input, output */
|
||||||
* <pre>
|
uint16_t index; /* Index variable */
|
||||||
* wa=-(1/6)*fract.^3 + (1/2)*fract.^2 - (1/3)*fract;
|
float32_t a, b; /* Two nearest output values */
|
||||||
* wb=(1/2)*fract.^3 - fract.^2 - (1/2)*fract + 1;
|
int32_t n;
|
||||||
* wc=-(1/2)*fract.^3+(1/2)*fract.^2+fract;
|
float32_t findex;
|
||||||
* wd=(1/6)*fract.^3 - (1/6)*fract;
|
|
||||||
* </pre>
|
/* input x is in radians */
|
||||||
*/
|
/* Scale the input to [0 1] range from [0 2*PI] , divide input by 2*pi, add 0.25 (pi/2) to read sine table */
|
||||||
|
in = x * 0.159154943092f + 0.25f;
|
||||||
/**
|
|
||||||
* @addtogroup cos
|
/* Calculation of floor value of input */
|
||||||
* @{
|
n = (int32_t) in;
|
||||||
*/
|
|
||||||
|
/* Make negative values towards -infinity */
|
||||||
/**
|
if (in < 0.0f)
|
||||||
* @brief Fast approximation to the trigonometric cosine function for floating-point data.
|
{
|
||||||
* @param[in] x input value in radians.
|
n--;
|
||||||
* @return cos(x).
|
}
|
||||||
*/
|
|
||||||
|
/* Map input value to [0 1] */
|
||||||
float32_t arm_cos_f32(
|
in = in - (float32_t) n;
|
||||||
float32_t x)
|
|
||||||
{
|
/* Calculation of index of the table */
|
||||||
float32_t cosVal, fract, in; /* Temporary variables for input, output */
|
findex = (float32_t) FAST_MATH_TABLE_SIZE * in;
|
||||||
uint16_t index; /* Index variable */
|
index = ((uint16_t)findex) & 0x1ff;
|
||||||
float32_t a, b; /* Two nearest output values */
|
|
||||||
int32_t n;
|
/* fractional value calculation */
|
||||||
float32_t findex;
|
fract = findex - (float32_t) index;
|
||||||
|
|
||||||
/* input x is in radians */
|
/* Read two nearest values of input value from the cos table */
|
||||||
/* Scale the input to [0 1] range from [0 2*PI] , divide input by 2*pi, add 0.25 (pi/2) to read sine table */
|
a = sinTable_f32[index];
|
||||||
in = x * 0.159154943092f + 0.25f;
|
b = sinTable_f32[index+1];
|
||||||
|
|
||||||
/* Calculation of floor value of input */
|
/* Linear interpolation process */
|
||||||
n = (int32_t) in;
|
cosVal = (1.0f-fract)*a + fract*b;
|
||||||
|
|
||||||
/* Make negative values towards -infinity */
|
/* Return the output value */
|
||||||
if(in < 0.0f)
|
return (cosVal);
|
||||||
{
|
}
|
||||||
n--;
|
|
||||||
}
|
/**
|
||||||
|
* @} end of cos group
|
||||||
/* Map input value to [0 1] */
|
*/
|
||||||
in = in - (float32_t) n;
|
|
||||||
|
|
||||||
/* Calculation of index of the table */
|
|
||||||
findex = (float32_t) FAST_MATH_TABLE_SIZE * in;
|
|
||||||
index = ((uint16_t)findex) & 0x1ff;
|
|
||||||
|
|
||||||
/* fractional value calculation */
|
|
||||||
fract = findex - (float32_t) index;
|
|
||||||
|
|
||||||
/* Read two nearest values of input value from the cos table */
|
|
||||||
a = sinTable_f32[index];
|
|
||||||
b = sinTable_f32[index+1];
|
|
||||||
|
|
||||||
/* Linear interpolation process */
|
|
||||||
cosVal = (1.0f-fract)*a + fract*b;
|
|
||||||
|
|
||||||
/* Return the output value */
|
|
||||||
return (cosVal);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of cos group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,96 +1,84 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cos_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Fast cosine calculation for Q15 values
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cos_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Fast cosine calculation for Q15 values.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
#include "arm_common_tables.h"
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
/**
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
* @ingroup groupFastMath
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
*/
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
/**
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @addtogroup cos
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* @{
|
||||||
* -------------------------------------------------------------------- */
|
*/
|
||||||
|
|
||||||
#include "arm_math.h"
|
/**
|
||||||
#include "arm_common_tables.h"
|
* @brief Fast approximation to the trigonometric cosine function for Q15 data.
|
||||||
|
* @param[in] x Scaled input value in radians.
|
||||||
/**
|
* @return cos(x).
|
||||||
* @ingroup groupFastMath
|
*
|
||||||
*/
|
* The Q15 input value is in the range [0 +0.9999] and is mapped to a radian
|
||||||
|
* value in the range [0 2*pi).
|
||||||
/**
|
*/
|
||||||
* @addtogroup cos
|
|
||||||
* @{
|
q15_t arm_cos_q15(
|
||||||
*/
|
q15_t x)
|
||||||
|
{
|
||||||
/**
|
q15_t cosVal; /* Temporary variables for input, output */
|
||||||
* @brief Fast approximation to the trigonometric cosine function for Q15 data.
|
int32_t index; /* Index variables */
|
||||||
* @param[in] x Scaled input value in radians.
|
q15_t a, b; /* Four nearest output values */
|
||||||
* @return cos(x).
|
q15_t fract; /* Temporary values for fractional values */
|
||||||
*
|
|
||||||
* The Q15 input value is in the range [0 +0.9999] and is mapped to a radian
|
/* add 0.25 (pi/2) to read sine table */
|
||||||
* value in the range [0 2*pi).
|
x = (uint16_t)x + 0x2000;
|
||||||
*/
|
if (x < 0)
|
||||||
|
{ /* convert negative numbers to corresponding positive ones */
|
||||||
q15_t arm_cos_q15(
|
x = (uint16_t)x + 0x8000;
|
||||||
q15_t x)
|
}
|
||||||
{
|
|
||||||
q15_t sinVal; /* Temporary variables for input, output */
|
/* Calculate the nearest index */
|
||||||
int32_t index; /* Index variables */
|
index = (uint32_t)x >> FAST_MATH_Q15_SHIFT;
|
||||||
q15_t a, b; /* Four nearest output values */
|
|
||||||
q15_t fract; /* Temporary values for fractional values */
|
/* Calculation of fractional value */
|
||||||
|
fract = (x - (index << FAST_MATH_Q15_SHIFT)) << 9;
|
||||||
/* add 0.25 (pi/2) to read sine table */
|
|
||||||
x += 0x2000;
|
/* Read two nearest values of input value from the sin table */
|
||||||
if(x < 0)
|
a = sinTable_q15[index];
|
||||||
{ /* convert negative numbers to corresponding positive ones */
|
b = sinTable_q15[index+1];
|
||||||
x = x + 0x8000;
|
|
||||||
}
|
/* Linear interpolation process */
|
||||||
|
cosVal = (q31_t)(0x8000-fract)*a >> 16;
|
||||||
/* Calculate the nearest index */
|
cosVal = (q15_t)((((q31_t)cosVal << 16) + ((q31_t)fract*b)) >> 16);
|
||||||
index = (uint32_t)x >> FAST_MATH_Q15_SHIFT;
|
|
||||||
|
return cosVal << 1;
|
||||||
/* Calculation of fractional value */
|
}
|
||||||
fract = (x - (index << FAST_MATH_Q15_SHIFT)) << 9;
|
|
||||||
|
/**
|
||||||
/* Read two nearest values of input value from the sin table */
|
* @} end of cos group
|
||||||
a = sinTable_q15[index];
|
*/
|
||||||
b = sinTable_q15[index+1];
|
|
||||||
|
|
||||||
/* Linear interpolation process */
|
|
||||||
sinVal = (q31_t)(0x8000-fract)*a >> 16;
|
|
||||||
sinVal = (q15_t)((((q31_t)sinVal << 16) + ((q31_t)fract*b)) >> 16);
|
|
||||||
|
|
||||||
return sinVal << 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of cos group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,96 +1,84 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_cos_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Fast cosine calculation for Q31 values
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_cos_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Fast cosine calculation for Q31 values.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
#include "arm_common_tables.h"
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
/**
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
* @ingroup groupFastMath
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
*/
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
/**
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @addtogroup cos
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* @{
|
||||||
* -------------------------------------------------------------------- */
|
*/
|
||||||
|
|
||||||
#include "arm_math.h"
|
/**
|
||||||
#include "arm_common_tables.h"
|
* @brief Fast approximation to the trigonometric cosine function for Q31 data.
|
||||||
|
* @param[in] x Scaled input value in radians.
|
||||||
/**
|
* @return cos(x).
|
||||||
* @ingroup groupFastMath
|
*
|
||||||
*/
|
* The Q31 input value is in the range [0 +0.9999] and is mapped to a radian
|
||||||
|
* value in the range [0 2*pi).
|
||||||
/**
|
*/
|
||||||
* @addtogroup cos
|
|
||||||
* @{
|
q31_t arm_cos_q31(
|
||||||
*/
|
q31_t x)
|
||||||
|
{
|
||||||
/**
|
q31_t cosVal; /* Temporary variables for input, output */
|
||||||
* @brief Fast approximation to the trigonometric cosine function for Q31 data.
|
int32_t index; /* Index variables */
|
||||||
* @param[in] x Scaled input value in radians.
|
q31_t a, b; /* Four nearest output values */
|
||||||
* @return cos(x).
|
q31_t fract; /* Temporary values for fractional values */
|
||||||
*
|
|
||||||
* The Q31 input value is in the range [0 +0.9999] and is mapped to a radian
|
/* add 0.25 (pi/2) to read sine table */
|
||||||
* value in the range [0 2*pi).
|
x = (uint32_t)x + 0x20000000;
|
||||||
*/
|
if (x < 0)
|
||||||
|
{ /* convert negative numbers to corresponding positive ones */
|
||||||
q31_t arm_cos_q31(
|
x = (uint32_t)x + 0x80000000;
|
||||||
q31_t x)
|
}
|
||||||
{
|
|
||||||
q31_t cosVal; /* Temporary variables for input, output */
|
/* Calculate the nearest index */
|
||||||
int32_t index; /* Index variables */
|
index = (uint32_t)x >> FAST_MATH_Q31_SHIFT;
|
||||||
q31_t a, b; /* Four nearest output values */
|
|
||||||
q31_t fract; /* Temporary values for fractional values */
|
/* Calculation of fractional value */
|
||||||
|
fract = (x - (index << FAST_MATH_Q31_SHIFT)) << 9;
|
||||||
/* add 0.25 (pi/2) to read sine table */
|
|
||||||
x += 0x20000000;
|
/* Read two nearest values of input value from the sin table */
|
||||||
if(x < 0)
|
a = sinTable_q31[index];
|
||||||
{ /* convert negative numbers to corresponding positive ones */
|
b = sinTable_q31[index+1];
|
||||||
x = x + 0x80000000;
|
|
||||||
}
|
/* Linear interpolation process */
|
||||||
|
cosVal = (q63_t)(0x80000000-fract)*a >> 32;
|
||||||
/* Calculate the nearest index */
|
cosVal = (q31_t)((((q63_t)cosVal << 32) + ((q63_t)fract*b)) >> 32);
|
||||||
index = (uint32_t)x >> FAST_MATH_Q31_SHIFT;
|
|
||||||
|
return cosVal << 1;
|
||||||
/* Calculation of fractional value */
|
}
|
||||||
fract = (x - (index << FAST_MATH_Q31_SHIFT)) << 9;
|
|
||||||
|
/**
|
||||||
/* Read two nearest values of input value from the sin table */
|
* @} end of cos group
|
||||||
a = sinTable_q31[index];
|
*/
|
||||||
b = sinTable_q31[index+1];
|
|
||||||
|
|
||||||
/* Linear interpolation process */
|
|
||||||
cosVal = (q63_t)(0x80000000-fract)*a >> 32;
|
|
||||||
cosVal = (q31_t)((((q63_t)cosVal << 32) + ((q63_t)fract*b)) >> 32);
|
|
||||||
|
|
||||||
return cosVal << 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of cos group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,139 +1,123 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_sin_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Fast sine calculation for floating-point values
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_sin_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Fast sine calculation for floating-point values.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
#include "arm_common_tables.h"
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
#include <math.h>
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
/**
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
* @ingroup groupFastMath
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
*/
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
/**
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* @defgroup sin Sine
|
||||||
* -------------------------------------------------------------------- */
|
*
|
||||||
|
* Computes the trigonometric sine function using a combination of table lookup
|
||||||
#include "arm_math.h"
|
* and linear interpolation. There are separate functions for
|
||||||
#include "arm_common_tables.h"
|
* Q15, Q31, and floating-point data types.
|
||||||
|
* The input to the floating-point version is in radians and in the range [0 2*pi) while the
|
||||||
/**
|
* fixed-point Q15 and Q31 have a scaled input with the range
|
||||||
* @ingroup groupFastMath
|
* [0 +0.9999] mapping to [0 2*pi). The fixed-point range is chosen so that a
|
||||||
*/
|
* value of 2*pi wraps around to 0.
|
||||||
|
*
|
||||||
/**
|
* The implementation is based on table lookup using 256 values together with linear interpolation.
|
||||||
* @defgroup sin Sine
|
* The steps used are:
|
||||||
*
|
* -# Calculation of the nearest integer table index
|
||||||
* Computes the trigonometric sine function using a combination of table lookup
|
* -# Compute the fractional portion (fract) of the table index.
|
||||||
* and cubic interpolation. There are separate functions for
|
* -# The final result equals <code>(1.0f-fract)*a + fract*b;</code>
|
||||||
* Q15, Q31, and floating-point data types.
|
*
|
||||||
* The input to the floating-point version is in radians while the
|
* where
|
||||||
* fixed-point Q15 and Q31 have a scaled input with the range
|
* <pre>
|
||||||
* [0 +0.9999] mapping to [0 2*pi). The fixed-point range is chosen so that a
|
* b=Table[index+0];
|
||||||
* value of 2*pi wraps around to 0.
|
* c=Table[index+1];
|
||||||
*
|
* </pre>
|
||||||
* The implementation is based on table lookup using 256 values together with cubic interpolation.
|
*/
|
||||||
* The steps used are:
|
|
||||||
* -# Calculation of the nearest integer table index
|
/**
|
||||||
* -# Fetch the four table values a, b, c, and d
|
* @addtogroup sin
|
||||||
* -# Compute the fractional portion (fract) of the table index.
|
* @{
|
||||||
* -# Calculation of wa, wb, wc, wd
|
*/
|
||||||
* -# The final result equals <code>a*wa + b*wb + c*wc + d*wd</code>
|
|
||||||
*
|
/**
|
||||||
* where
|
* @brief Fast approximation to the trigonometric sine function for floating-point data.
|
||||||
* <pre>
|
* @param[in] x input value in radians.
|
||||||
* a=Table[index-1];
|
* @return sin(x).
|
||||||
* b=Table[index+0];
|
*/
|
||||||
* c=Table[index+1];
|
|
||||||
* d=Table[index+2];
|
float32_t arm_sin_f32(
|
||||||
* </pre>
|
float32_t x)
|
||||||
* and
|
{
|
||||||
* <pre>
|
float32_t sinVal, fract, in; /* Temporary variables for input, output */
|
||||||
* wa=-(1/6)*fract.^3 + (1/2)*fract.^2 - (1/3)*fract;
|
uint16_t index; /* Index variable */
|
||||||
* wb=(1/2)*fract.^3 - fract.^2 - (1/2)*fract + 1;
|
float32_t a, b; /* Two nearest output values */
|
||||||
* wc=-(1/2)*fract.^3+(1/2)*fract.^2+fract;
|
int32_t n;
|
||||||
* wd=(1/6)*fract.^3 - (1/6)*fract;
|
float32_t findex;
|
||||||
* </pre>
|
|
||||||
*/
|
/* Special case for small negative inputs */
|
||||||
|
if ((x < 0.0f) && (x >= -1.9e-7f)) {
|
||||||
/**
|
return x;
|
||||||
* @addtogroup sin
|
}
|
||||||
* @{
|
|
||||||
*/
|
/* input x is in radians */
|
||||||
|
/* Scale the input to [0 1] range from [0 2*PI] , divide input by 2*pi */
|
||||||
/**
|
in = x * 0.159154943092f;
|
||||||
* @brief Fast approximation to the trigonometric sine function for floating-point data.
|
|
||||||
* @param[in] x input value in radians.
|
/* Calculation of floor value of input */
|
||||||
* @return sin(x).
|
n = (int32_t) in;
|
||||||
*/
|
|
||||||
|
/* Make negative values towards -infinity */
|
||||||
float32_t arm_sin_f32(
|
if (x < 0.0f)
|
||||||
float32_t x)
|
{
|
||||||
{
|
n--;
|
||||||
float32_t sinVal, fract, in; /* Temporary variables for input, output */
|
}
|
||||||
uint16_t index; /* Index variable */
|
|
||||||
float32_t a, b; /* Two nearest output values */
|
/* Map input value to [0 1] */
|
||||||
int32_t n;
|
in = in - (float32_t) n;
|
||||||
float32_t findex;
|
|
||||||
|
/* Calculation of index of the table */
|
||||||
/* input x is in radians */
|
findex = (float32_t) FAST_MATH_TABLE_SIZE * in;
|
||||||
/* Scale the input to [0 1] range from [0 2*PI] , divide input by 2*pi */
|
|
||||||
in = x * 0.159154943092f;
|
index = ((uint16_t)findex) & 0x1ff;
|
||||||
|
|
||||||
/* Calculation of floor value of input */
|
/* fractional value calculation */
|
||||||
n = (int32_t) in;
|
fract = findex - (float32_t) index;
|
||||||
|
|
||||||
/* Make negative values towards -infinity */
|
/* Read two nearest values of input value from the sin table */
|
||||||
if(x < 0.0f)
|
a = sinTable_f32[index];
|
||||||
{
|
b = sinTable_f32[index+1];
|
||||||
n--;
|
|
||||||
}
|
/* Linear interpolation process */
|
||||||
|
sinVal = (1.0f-fract)*a + fract*b;
|
||||||
/* Map input value to [0 1] */
|
|
||||||
in = in - (float32_t) n;
|
/* Return the output value */
|
||||||
|
return (sinVal);
|
||||||
/* Calculation of index of the table */
|
}
|
||||||
findex = (float32_t) FAST_MATH_TABLE_SIZE * in;
|
|
||||||
index = ((uint16_t)findex) & 0x1ff;
|
/**
|
||||||
|
* @} end of sin group
|
||||||
/* fractional value calculation */
|
*/
|
||||||
fract = findex - (float32_t) index;
|
|
||||||
|
|
||||||
/* Read two nearest values of input value from the sin table */
|
|
||||||
a = sinTable_f32[index];
|
|
||||||
b = sinTable_f32[index+1];
|
|
||||||
|
|
||||||
/* Linear interpolation process */
|
|
||||||
sinVal = (1.0f-fract)*a + fract*b;
|
|
||||||
|
|
||||||
/* Return the output value */
|
|
||||||
return (sinVal);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of sin group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,88 +1,76 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_sin_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Fast sine calculation for Q15 values
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_sin_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Fast sine calculation for Q15 values.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
#include "arm_common_tables.h"
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
/**
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
* @ingroup groupFastMath
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
*/
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
/**
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @addtogroup sin
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* @{
|
||||||
* -------------------------------------------------------------------- */
|
*/
|
||||||
|
|
||||||
#include "arm_math.h"
|
/**
|
||||||
#include "arm_common_tables.h"
|
* @brief Fast approximation to the trigonometric sine function for Q15 data.
|
||||||
|
* @param[in] x Scaled input value in radians.
|
||||||
/**
|
* @return sin(x).
|
||||||
* @ingroup groupFastMath
|
*
|
||||||
*/
|
* The Q15 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*pi).
|
||||||
|
*/
|
||||||
/**
|
|
||||||
* @addtogroup sin
|
q15_t arm_sin_q15(
|
||||||
* @{
|
q15_t x)
|
||||||
*/
|
{
|
||||||
|
q15_t sinVal; /* Temporary variables for input, output */
|
||||||
/**
|
int32_t index; /* Index variables */
|
||||||
* @brief Fast approximation to the trigonometric sine function for Q15 data.
|
q15_t a, b; /* Four nearest output values */
|
||||||
* @param[in] x Scaled input value in radians.
|
q15_t fract; /* Temporary values for fractional values */
|
||||||
* @return sin(x).
|
|
||||||
*
|
/* Calculate the nearest index */
|
||||||
* The Q15 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*pi).
|
index = (uint32_t)x >> FAST_MATH_Q15_SHIFT;
|
||||||
*/
|
|
||||||
|
/* Calculation of fractional value */
|
||||||
q15_t arm_sin_q15(
|
fract = (x - (index << FAST_MATH_Q15_SHIFT)) << 9;
|
||||||
q15_t x)
|
|
||||||
{
|
/* Read two nearest values of input value from the sin table */
|
||||||
q15_t sinVal; /* Temporary variables for input, output */
|
a = sinTable_q15[index];
|
||||||
int32_t index; /* Index variables */
|
b = sinTable_q15[index+1];
|
||||||
q15_t a, b; /* Four nearest output values */
|
|
||||||
q15_t fract; /* Temporary values for fractional values */
|
/* Linear interpolation process */
|
||||||
|
sinVal = (q31_t)(0x8000-fract)*a >> 16;
|
||||||
/* Calculate the nearest index */
|
sinVal = (q15_t)((((q31_t)sinVal << 16) + ((q31_t)fract*b)) >> 16);
|
||||||
index = (uint32_t)x >> FAST_MATH_Q15_SHIFT;
|
|
||||||
|
return sinVal << 1;
|
||||||
/* Calculation of fractional value */
|
}
|
||||||
fract = (x - (index << FAST_MATH_Q15_SHIFT)) << 9;
|
|
||||||
|
/**
|
||||||
/* Read two nearest values of input value from the sin table */
|
* @} end of sin group
|
||||||
a = sinTable_q15[index];
|
*/
|
||||||
b = sinTable_q15[index+1];
|
|
||||||
|
|
||||||
/* Linear interpolation process */
|
|
||||||
sinVal = (q31_t)(0x8000-fract)*a >> 16;
|
|
||||||
sinVal = (q15_t)((((q31_t)sinVal << 16) + ((q31_t)fract*b)) >> 16);
|
|
||||||
|
|
||||||
return sinVal << 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of sin group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,87 +1,75 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_sin_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Fast sine calculation for Q31 values
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_sin_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Fast sine calculation for Q31 values.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
#include "arm_common_tables.h"
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
/**
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
* @ingroup groupFastMath
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
*/
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
/**
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @addtogroup sin
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* @{
|
||||||
* -------------------------------------------------------------------- */
|
*/
|
||||||
|
|
||||||
#include "arm_math.h"
|
/**
|
||||||
#include "arm_common_tables.h"
|
* @brief Fast approximation to the trigonometric sine function for Q31 data.
|
||||||
|
* @param[in] x Scaled input value in radians.
|
||||||
/**
|
* @return sin(x).
|
||||||
* @ingroup groupFastMath
|
*
|
||||||
*/
|
* The Q31 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*pi). */
|
||||||
|
|
||||||
/**
|
q31_t arm_sin_q31(
|
||||||
* @addtogroup sin
|
q31_t x)
|
||||||
* @{
|
{
|
||||||
*/
|
q31_t sinVal; /* Temporary variables for input, output */
|
||||||
|
int32_t index; /* Index variables */
|
||||||
/**
|
q31_t a, b; /* Four nearest output values */
|
||||||
* @brief Fast approximation to the trigonometric sine function for Q31 data.
|
q31_t fract; /* Temporary values for fractional values */
|
||||||
* @param[in] x Scaled input value in radians.
|
|
||||||
* @return sin(x).
|
/* Calculate the nearest index */
|
||||||
*
|
index = (uint32_t)x >> FAST_MATH_Q31_SHIFT;
|
||||||
* The Q31 input value is in the range [0 +0.9999] and is mapped to a radian value in the range [0 2*pi). */
|
|
||||||
|
/* Calculation of fractional value */
|
||||||
q31_t arm_sin_q31(
|
fract = (x - (index << FAST_MATH_Q31_SHIFT)) << 9;
|
||||||
q31_t x)
|
|
||||||
{
|
/* Read two nearest values of input value from the sin table */
|
||||||
q31_t sinVal; /* Temporary variables for input, output */
|
a = sinTable_q31[index];
|
||||||
int32_t index; /* Index variables */
|
b = sinTable_q31[index+1];
|
||||||
q31_t a, b; /* Four nearest output values */
|
|
||||||
q31_t fract; /* Temporary values for fractional values */
|
/* Linear interpolation process */
|
||||||
|
sinVal = (q63_t)(0x80000000-fract)*a >> 32;
|
||||||
/* Calculate the nearest index */
|
sinVal = (q31_t)((((q63_t)sinVal << 32) + ((q63_t)fract*b)) >> 32);
|
||||||
index = (uint32_t)x >> FAST_MATH_Q31_SHIFT;
|
|
||||||
|
return sinVal << 1;
|
||||||
/* Calculation of fractional value */
|
}
|
||||||
fract = (x - (index << FAST_MATH_Q31_SHIFT)) << 9;
|
|
||||||
|
/**
|
||||||
/* Read two nearest values of input value from the sin table */
|
* @} end of sin group
|
||||||
a = sinTable_q31[index];
|
*/
|
||||||
b = sinTable_q31[index+1];
|
|
||||||
|
|
||||||
/* Linear interpolation process */
|
|
||||||
sinVal = (q63_t)(0x80000000-fract)*a >> 32;
|
|
||||||
sinVal = (q31_t)((((q63_t)sinVal << 32) + ((q63_t)fract*b)) >> 32);
|
|
||||||
|
|
||||||
return sinVal << 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of sin group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,155 +1,144 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_sqrt_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q15 square root function
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_sqrt_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q15 square root function.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
#include "arm_common_tables.h"
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
/**
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
* @ingroup groupFastMath
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
*/
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
/**
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* @addtogroup SQRT
|
||||||
* -------------------------------------------------------------------- */
|
* @{
|
||||||
#include "arm_math.h"
|
*/
|
||||||
#include "arm_common_tables.h"
|
|
||||||
|
/**
|
||||||
|
* @brief Q15 square root function.
|
||||||
/**
|
* @param[in] in input value. The range of the input value is [0 +1) or 0x0000 to 0x7FFF.
|
||||||
* @ingroup groupFastMath
|
* @param[out] *pOut square root of input value.
|
||||||
*/
|
* @return The function returns ARM_MATH_SUCCESS if the input value is positive
|
||||||
|
* and ARM_MATH_ARGUMENT_ERROR if the input is negative. For
|
||||||
/**
|
* negative inputs, the function returns *pOut = 0.
|
||||||
* @addtogroup SQRT
|
*/
|
||||||
* @{
|
|
||||||
*/
|
arm_status arm_sqrt_q15(
|
||||||
|
q15_t in,
|
||||||
/**
|
q15_t * pOut)
|
||||||
* @brief Q15 square root function.
|
{
|
||||||
* @param[in] in input value. The range of the input value is [0 +1) or 0x0000 to 0x7FFF.
|
q15_t number, temp1, var1, signBits1, half;
|
||||||
* @param[out] *pOut square root of input value.
|
q31_t bits_val1;
|
||||||
* @return The function returns ARM_MATH_SUCCESS if the input value is positive
|
float32_t temp_float1;
|
||||||
* and ARM_MATH_ARGUMENT_ERROR if the input is negative. For
|
union
|
||||||
* negative inputs, the function returns *pOut = 0.
|
{
|
||||||
*/
|
q31_t fracval;
|
||||||
|
float32_t floatval;
|
||||||
arm_status arm_sqrt_q15(
|
} tempconv;
|
||||||
q15_t in,
|
|
||||||
q15_t * pOut)
|
number = in;
|
||||||
{
|
|
||||||
q15_t number, temp1, var1, signBits1, half;
|
/* If the input is a positive number then compute the signBits. */
|
||||||
q31_t bits_val1;
|
if (number > 0)
|
||||||
float32_t temp_float1;
|
{
|
||||||
union
|
signBits1 = __CLZ(number) - 17;
|
||||||
{
|
|
||||||
q31_t fracval;
|
/* Shift by the number of signBits1 */
|
||||||
float32_t floatval;
|
if ((signBits1 % 2) == 0)
|
||||||
} tempconv;
|
{
|
||||||
|
number = number << signBits1;
|
||||||
number = in;
|
}
|
||||||
|
else
|
||||||
/* If the input is a positive number then compute the signBits. */
|
{
|
||||||
if(number > 0)
|
number = number << (signBits1 - 1);
|
||||||
{
|
}
|
||||||
signBits1 = __CLZ(number) - 17;
|
|
||||||
|
/* Calculate half value of the number */
|
||||||
/* Shift by the number of signBits1 */
|
half = number >> 1;
|
||||||
if((signBits1 % 2) == 0)
|
/* Store the number for later use */
|
||||||
{
|
temp1 = number;
|
||||||
number = number << signBits1;
|
|
||||||
}
|
/* Convert to float */
|
||||||
else
|
temp_float1 = number * 3.051757812500000e-005f;
|
||||||
{
|
/*Store as integer */
|
||||||
number = number << (signBits1 - 1);
|
tempconv.floatval = temp_float1;
|
||||||
}
|
bits_val1 = tempconv.fracval;
|
||||||
|
/* Subtract the shifted value from the magic number to give intial guess */
|
||||||
/* Calculate half value of the number */
|
bits_val1 = 0x5f3759df - (bits_val1 >> 1); /* gives initial guess */
|
||||||
half = number >> 1;
|
/* Store as float */
|
||||||
/* Store the number for later use */
|
tempconv.fracval = bits_val1;
|
||||||
temp1 = number;
|
temp_float1 = tempconv.floatval;
|
||||||
|
/* Convert to integer format */
|
||||||
/*Convert to float */
|
var1 = (q31_t) (temp_float1 * 16384);
|
||||||
temp_float1 = number * 3.051757812500000e-005f;
|
|
||||||
/*Store as integer */
|
/* 1st iteration */
|
||||||
tempconv.floatval = temp_float1;
|
var1 = ((q15_t) ((q31_t) var1 * (0x3000 -
|
||||||
bits_val1 = tempconv.fracval;
|
((q15_t)
|
||||||
/* Subtract the shifted value from the magic number to give intial guess */
|
((((q15_t)
|
||||||
bits_val1 = 0x5f3759df - (bits_val1 >> 1); // gives initial guess
|
(((q31_t) var1 * var1) >> 15)) *
|
||||||
/* Store as float */
|
(q31_t) half) >> 15))) >> 15)) << 2;
|
||||||
tempconv.fracval = bits_val1;
|
/* 2nd iteration */
|
||||||
temp_float1 = tempconv.floatval;
|
var1 = ((q15_t) ((q31_t) var1 * (0x3000 -
|
||||||
/* Convert to integer format */
|
((q15_t)
|
||||||
var1 = (q31_t) (temp_float1 * 16384);
|
((((q15_t)
|
||||||
|
(((q31_t) var1 * var1) >> 15)) *
|
||||||
/* 1st iteration */
|
(q31_t) half) >> 15))) >> 15)) << 2;
|
||||||
var1 = ((q15_t) ((q31_t) var1 * (0x3000 -
|
/* 3rd iteration */
|
||||||
((q15_t)
|
var1 = ((q15_t) ((q31_t) var1 * (0x3000 -
|
||||||
((((q15_t)
|
((q15_t)
|
||||||
(((q31_t) var1 * var1) >> 15)) *
|
((((q15_t)
|
||||||
(q31_t) half) >> 15))) >> 15)) << 2;
|
(((q31_t) var1 * var1) >> 15)) *
|
||||||
/* 2nd iteration */
|
(q31_t) half) >> 15))) >> 15)) << 2;
|
||||||
var1 = ((q15_t) ((q31_t) var1 * (0x3000 -
|
|
||||||
((q15_t)
|
/* Multiply the inverse square root with the original value */
|
||||||
((((q15_t)
|
var1 = ((q15_t) (((q31_t) temp1 * var1) >> 15)) << 1;
|
||||||
(((q31_t) var1 * var1) >> 15)) *
|
|
||||||
(q31_t) half) >> 15))) >> 15)) << 2;
|
/* Shift the output down accordingly */
|
||||||
/* 3rd iteration */
|
if ((signBits1 % 2) == 0)
|
||||||
var1 = ((q15_t) ((q31_t) var1 * (0x3000 -
|
{
|
||||||
((q15_t)
|
var1 = var1 >> (signBits1 / 2);
|
||||||
((((q15_t)
|
}
|
||||||
(((q31_t) var1 * var1) >> 15)) *
|
else
|
||||||
(q31_t) half) >> 15))) >> 15)) << 2;
|
{
|
||||||
|
var1 = var1 >> ((signBits1 - 1) / 2);
|
||||||
/* Multiply the inverse square root with the original value */
|
}
|
||||||
var1 = ((q15_t) (((q31_t) temp1 * var1) >> 15)) << 1;
|
*pOut = var1;
|
||||||
|
|
||||||
/* Shift the output down accordingly */
|
return (ARM_MATH_SUCCESS);
|
||||||
if((signBits1 % 2) == 0)
|
}
|
||||||
{
|
/* If the number is a negative number then store zero as its square root value */
|
||||||
var1 = var1 >> (signBits1 / 2);
|
else
|
||||||
}
|
{
|
||||||
else
|
*pOut = 0;
|
||||||
{
|
return (ARM_MATH_ARGUMENT_ERROR);
|
||||||
var1 = var1 >> ((signBits1 - 1) / 2);
|
}
|
||||||
}
|
}
|
||||||
*pOut = var1;
|
|
||||||
|
/**
|
||||||
return (ARM_MATH_SUCCESS);
|
* @} end of SQRT group
|
||||||
}
|
*/
|
||||||
/* If the number is a negative number then store zero as its square root value */
|
|
||||||
else
|
|
||||||
{
|
|
||||||
*pOut = 0;
|
|
||||||
return (ARM_MATH_ARGUMENT_ERROR);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of SQRT group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,153 +1,142 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_sqrt_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q31 square root function
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_sqrt_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q31 square root function.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
#include "arm_common_tables.h"
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
/**
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
* @ingroup groupFastMath
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
*/
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
/**
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @addtogroup SQRT
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* @{
|
||||||
* -------------------------------------------------------------------- */
|
*/
|
||||||
#include "arm_math.h"
|
|
||||||
#include "arm_common_tables.h"
|
/**
|
||||||
|
* @brief Q31 square root function.
|
||||||
/**
|
* @param[in] in input value. The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF.
|
||||||
* @ingroup groupFastMath
|
* @param[out] *pOut square root of input value.
|
||||||
*/
|
* @return The function returns ARM_MATH_SUCCESS if the input value is positive
|
||||||
|
* and ARM_MATH_ARGUMENT_ERROR if the input is negative. For
|
||||||
/**
|
* negative inputs, the function returns *pOut = 0.
|
||||||
* @addtogroup SQRT
|
*/
|
||||||
* @{
|
|
||||||
*/
|
arm_status arm_sqrt_q31(
|
||||||
|
q31_t in,
|
||||||
/**
|
q31_t * pOut)
|
||||||
* @brief Q31 square root function.
|
{
|
||||||
* @param[in] in input value. The range of the input value is [0 +1) or 0x00000000 to 0x7FFFFFFF.
|
q31_t number, temp1, bits_val1, var1, signBits1, half;
|
||||||
* @param[out] *pOut square root of input value.
|
float32_t temp_float1;
|
||||||
* @return The function returns ARM_MATH_SUCCESS if the input value is positive
|
union
|
||||||
* and ARM_MATH_ARGUMENT_ERROR if the input is negative. For
|
{
|
||||||
* negative inputs, the function returns *pOut = 0.
|
q31_t fracval;
|
||||||
*/
|
float32_t floatval;
|
||||||
|
} tempconv;
|
||||||
arm_status arm_sqrt_q31(
|
|
||||||
q31_t in,
|
number = in;
|
||||||
q31_t * pOut)
|
|
||||||
{
|
/* If the input is a positive number then compute the signBits. */
|
||||||
q31_t number, temp1, bits_val1, var1, signBits1, half;
|
if (number > 0)
|
||||||
float32_t temp_float1;
|
{
|
||||||
union
|
signBits1 = __CLZ(number) - 1;
|
||||||
{
|
|
||||||
q31_t fracval;
|
/* Shift by the number of signBits1 */
|
||||||
float32_t floatval;
|
if ((signBits1 % 2) == 0)
|
||||||
} tempconv;
|
{
|
||||||
|
number = number << signBits1;
|
||||||
number = in;
|
}
|
||||||
|
else
|
||||||
/* If the input is a positive number then compute the signBits. */
|
{
|
||||||
if(number > 0)
|
number = number << (signBits1 - 1);
|
||||||
{
|
}
|
||||||
signBits1 = __CLZ(number) - 1;
|
|
||||||
|
/* Calculate half value of the number */
|
||||||
/* Shift by the number of signBits1 */
|
half = number >> 1;
|
||||||
if((signBits1 % 2) == 0)
|
/* Store the number for later use */
|
||||||
{
|
temp1 = number;
|
||||||
number = number << signBits1;
|
|
||||||
}
|
/*Convert to float */
|
||||||
else
|
temp_float1 = number * 4.6566128731e-010f;
|
||||||
{
|
/*Store as integer */
|
||||||
number = number << (signBits1 - 1);
|
tempconv.floatval = temp_float1;
|
||||||
}
|
bits_val1 = tempconv.fracval;
|
||||||
|
/* Subtract the shifted value from the magic number to give intial guess */
|
||||||
/* Calculate half value of the number */
|
bits_val1 = 0x5f3759df - (bits_val1 >> 1); /* gives initial guess */
|
||||||
half = number >> 1;
|
/* Store as float */
|
||||||
/* Store the number for later use */
|
tempconv.fracval = bits_val1;
|
||||||
temp1 = number;
|
temp_float1 = tempconv.floatval;
|
||||||
|
/* Convert to integer format */
|
||||||
/*Convert to float */
|
var1 = (q31_t) (temp_float1 * 1073741824);
|
||||||
temp_float1 = number * 4.6566128731e-010f;
|
|
||||||
/*Store as integer */
|
/* 1st iteration */
|
||||||
tempconv.floatval = temp_float1;
|
var1 = ((q31_t) ((q63_t) var1 * (0x30000000 -
|
||||||
bits_val1 = tempconv.fracval;
|
((q31_t)
|
||||||
/* Subtract the shifted value from the magic number to give intial guess */
|
((((q31_t)
|
||||||
bits_val1 = 0x5f3759df - (bits_val1 >> 1); // gives initial guess
|
(((q63_t) var1 * var1) >> 31)) *
|
||||||
/* Store as float */
|
(q63_t) half) >> 31))) >> 31)) << 2;
|
||||||
tempconv.fracval = bits_val1;
|
/* 2nd iteration */
|
||||||
temp_float1 = tempconv.floatval;
|
var1 = ((q31_t) ((q63_t) var1 * (0x30000000 -
|
||||||
/* Convert to integer format */
|
((q31_t)
|
||||||
var1 = (q31_t) (temp_float1 * 1073741824);
|
((((q31_t)
|
||||||
|
(((q63_t) var1 * var1) >> 31)) *
|
||||||
/* 1st iteration */
|
(q63_t) half) >> 31))) >> 31)) << 2;
|
||||||
var1 = ((q31_t) ((q63_t) var1 * (0x30000000 -
|
/* 3rd iteration */
|
||||||
((q31_t)
|
var1 = ((q31_t) ((q63_t) var1 * (0x30000000 -
|
||||||
((((q31_t)
|
((q31_t)
|
||||||
(((q63_t) var1 * var1) >> 31)) *
|
((((q31_t)
|
||||||
(q63_t) half) >> 31))) >> 31)) << 2;
|
(((q63_t) var1 * var1) >> 31)) *
|
||||||
/* 2nd iteration */
|
(q63_t) half) >> 31))) >> 31)) << 2;
|
||||||
var1 = ((q31_t) ((q63_t) var1 * (0x30000000 -
|
|
||||||
((q31_t)
|
/* Multiply the inverse square root with the original value */
|
||||||
((((q31_t)
|
var1 = ((q31_t) (((q63_t) temp1 * var1) >> 31)) << 1;
|
||||||
(((q63_t) var1 * var1) >> 31)) *
|
|
||||||
(q63_t) half) >> 31))) >> 31)) << 2;
|
/* Shift the output down accordingly */
|
||||||
/* 3rd iteration */
|
if ((signBits1 % 2) == 0)
|
||||||
var1 = ((q31_t) ((q63_t) var1 * (0x30000000 -
|
{
|
||||||
((q31_t)
|
var1 = var1 >> (signBits1 / 2);
|
||||||
((((q31_t)
|
}
|
||||||
(((q63_t) var1 * var1) >> 31)) *
|
else
|
||||||
(q63_t) half) >> 31))) >> 31)) << 2;
|
{
|
||||||
|
var1 = var1 >> ((signBits1 - 1) / 2);
|
||||||
/* Multiply the inverse square root with the original value */
|
}
|
||||||
var1 = ((q31_t) (((q63_t) temp1 * var1) >> 31)) << 1;
|
*pOut = var1;
|
||||||
|
|
||||||
/* Shift the output down accordingly */
|
return (ARM_MATH_SUCCESS);
|
||||||
if((signBits1 % 2) == 0)
|
}
|
||||||
{
|
/* If the number is a negative number then store zero as its square root value */
|
||||||
var1 = var1 >> (signBits1 / 2);
|
else
|
||||||
}
|
{
|
||||||
else
|
*pOut = 0;
|
||||||
{
|
return (ARM_MATH_ARGUMENT_ERROR);
|
||||||
var1 = var1 >> ((signBits1 - 1) / 2);
|
}
|
||||||
}
|
}
|
||||||
*pOut = var1;
|
|
||||||
|
/**
|
||||||
return (ARM_MATH_SUCCESS);
|
* @} end of SQRT group
|
||||||
}
|
*/
|
||||||
/* If the number is a negative number then store zero as its square root value */
|
|
||||||
else
|
|
||||||
{
|
|
||||||
*pOut = 0;
|
|
||||||
return (ARM_MATH_ARGUMENT_ERROR);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of SQRT group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,110 +1,98 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_biquad_cascade_df1_32x64_init_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: High precision Q31 Biquad cascade filter initialization function
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_biquad_cascade_df1_32x64_init_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: High precision Q31 Biquad cascade filter initialization function.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupFilters
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup BiquadCascadeDF1_32x64
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @details
|
||||||
|
*
|
||||||
/**
|
* @param[in,out] *S points to an instance of the high precision Q31 Biquad cascade filter structure.
|
||||||
* @ingroup groupFilters
|
* @param[in] numStages number of 2nd order stages in the filter.
|
||||||
*/
|
* @param[in] *pCoeffs points to the filter coefficients.
|
||||||
|
* @param[in] *pState points to the state buffer.
|
||||||
/**
|
* @param[in] postShift Shift to be applied after the accumulator. Varies according to the coefficients format.
|
||||||
* @addtogroup BiquadCascadeDF1_32x64
|
* @return none
|
||||||
* @{
|
*
|
||||||
*/
|
* <b>Coefficient and State Ordering:</b>
|
||||||
|
*
|
||||||
/**
|
* \par
|
||||||
* @details
|
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
||||||
*
|
* <pre>
|
||||||
* @param[in,out] *S points to an instance of the high precision Q31 Biquad cascade filter structure.
|
* {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
|
||||||
* @param[in] numStages number of 2nd order stages in the filter.
|
* </pre>
|
||||||
* @param[in] *pCoeffs points to the filter coefficients.
|
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
||||||
* @param[in] *pState points to the state buffer.
|
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
||||||
* @param[in] postShift Shift to be applied after the accumulator. Varies according to the coefficients format.
|
* and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
|
||||||
* @return none
|
*
|
||||||
*
|
* \par
|
||||||
* <b>Coefficient and State Ordering:</b>
|
* The <code>pState</code> points to state variables array and size of each state variable is 1.63 format.
|
||||||
*
|
* Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
|
||||||
* \par
|
* The state variables are arranged in the state array as:
|
||||||
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
* <pre>
|
||||||
* <pre>
|
* {x[n-1], x[n-2], y[n-1], y[n-2]}
|
||||||
* {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
|
* </pre>
|
||||||
* </pre>
|
* The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
|
||||||
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
* The state array has a total length of <code>4*numStages</code> values.
|
||||||
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
||||||
* and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
|
*/
|
||||||
*
|
|
||||||
* \par
|
void arm_biquad_cas_df1_32x64_init_q31(
|
||||||
* The <code>pState</code> points to state variables array and size of each state variable is 1.63 format.
|
arm_biquad_cas_df1_32x64_ins_q31 * S,
|
||||||
* Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
|
uint8_t numStages,
|
||||||
* The state variables are arranged in the state array as:
|
q31_t * pCoeffs,
|
||||||
* <pre>
|
q63_t * pState,
|
||||||
* {x[n-1], x[n-2], y[n-1], y[n-2]}
|
uint8_t postShift)
|
||||||
* </pre>
|
{
|
||||||
* The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
|
/* Assign filter stages */
|
||||||
* The state array has a total length of <code>4*numStages</code> values.
|
S->numStages = numStages;
|
||||||
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
|
||||||
*/
|
/* Assign postShift to be applied to the output */
|
||||||
|
S->postShift = postShift;
|
||||||
void arm_biquad_cas_df1_32x64_init_q31(
|
|
||||||
arm_biquad_cas_df1_32x64_ins_q31 * S,
|
/* Assign coefficient pointer */
|
||||||
uint8_t numStages,
|
S->pCoeffs = pCoeffs;
|
||||||
q31_t * pCoeffs,
|
|
||||||
q63_t * pState,
|
/* Clear state buffer and size is always 4 * numStages */
|
||||||
uint8_t postShift)
|
memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(q63_t));
|
||||||
{
|
|
||||||
/* Assign filter stages */
|
/* Assign state pointer */
|
||||||
S->numStages = numStages;
|
S->pState = pState;
|
||||||
|
}
|
||||||
/* Assign postShift to be applied to the output */
|
|
||||||
S->postShift = postShift;
|
/**
|
||||||
|
* @} end of BiquadCascadeDF1_32x64 group
|
||||||
/* Assign coefficient pointer */
|
*/
|
||||||
S->pCoeffs = pCoeffs;
|
|
||||||
|
|
||||||
/* Clear state buffer and size is always 4 * numStages */
|
|
||||||
memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(q63_t));
|
|
||||||
|
|
||||||
/* Assign state pointer */
|
|
||||||
S->pState = pState;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BiquadCascadeDF1_32x64 group
|
|
||||||
*/
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,425 +1,412 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_biquad_cascade_df1_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Processing function for the floating-point Biquad cascade DirectFormI(DF1) filter
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_biquad_cascade_df1_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Processing function for the
|
* -------------------------------------------------------------------- */
|
||||||
* floating-point Biquad cascade DirectFormI(DF1) filter.
|
/*
|
||||||
*
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
*
|
||||||
*
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* Redistribution and use in source and binary forms, with or without
|
*
|
||||||
* modification, are permitted provided that the following conditions
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* are met:
|
* not use this file except in compliance with the License.
|
||||||
* - Redistributions of source code must retain the above copyright
|
* You may obtain a copy of the License at
|
||||||
* notice, this list of conditions and the following disclaimer.
|
*
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* notice, this list of conditions and the following disclaimer in
|
*
|
||||||
* the documentation and/or other materials provided with the
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distribution.
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* may be used to endorse or promote products derived from this
|
* See the License for the specific language governing permissions and
|
||||||
* software without specific prior written permission.
|
* limitations under the License.
|
||||||
*
|
*/
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
#include "arm_math.h"
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
/**
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
* @ingroup groupFilters
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
*/
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
/**
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
* @defgroup BiquadCascadeDF1 Biquad Cascade IIR Filters Using Direct Form I Structure
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
*
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* This set of functions implements arbitrary order recursive (IIR) filters.
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
* The filters are implemented as a cascade of second order Biquad sections.
|
||||||
* -------------------------------------------------------------------- */
|
* The functions support Q15, Q31 and floating-point data types.
|
||||||
|
* Fast version of Q15 and Q31 also supported on CortexM4 and Cortex-M3.
|
||||||
#include "arm_math.h"
|
*
|
||||||
|
* The functions operate on blocks of input and output data and each call to the function
|
||||||
/**
|
* processes <code>blockSize</code> samples through the filter.
|
||||||
* @ingroup groupFilters
|
* <code>pSrc</code> points to the array of input data and
|
||||||
*/
|
* <code>pDst</code> points to the array of output data.
|
||||||
|
* Both arrays contain <code>blockSize</code> values.
|
||||||
/**
|
*
|
||||||
* @defgroup BiquadCascadeDF1 Biquad Cascade IIR Filters Using Direct Form I Structure
|
* \par Algorithm
|
||||||
*
|
* Each Biquad stage implements a second order filter using the difference equation:
|
||||||
* This set of functions implements arbitrary order recursive (IIR) filters.
|
* <pre>
|
||||||
* The filters are implemented as a cascade of second order Biquad sections.
|
* y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||||
* The functions support Q15, Q31 and floating-point data types.
|
* </pre>
|
||||||
* Fast version of Q15 and Q31 also supported on CortexM4 and Cortex-M3.
|
* A Direct Form I algorithm is used with 5 coefficients and 4 state variables per stage.
|
||||||
*
|
* \image html Biquad.gif "Single Biquad filter stage"
|
||||||
* The functions operate on blocks of input and output data and each call to the function
|
* Coefficients <code>b0, b1 and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.
|
||||||
* processes <code>blockSize</code> samples through the filter.
|
* Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.
|
||||||
* <code>pSrc</code> points to the array of input data and
|
* Pay careful attention to the sign of the feedback coefficients.
|
||||||
* <code>pDst</code> points to the array of output data.
|
* Some design tools use the difference equation
|
||||||
* Both arrays contain <code>blockSize</code> values.
|
* <pre>
|
||||||
*
|
* y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] - a1 * y[n-1] - a2 * y[n-2]
|
||||||
* \par Algorithm
|
* </pre>
|
||||||
* Each Biquad stage implements a second order filter using the difference equation:
|
* In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.
|
||||||
* <pre>
|
*
|
||||||
* y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
* \par
|
||||||
* </pre>
|
* Higher order filters are realized as a cascade of second order sections.
|
||||||
* A Direct Form I algorithm is used with 5 coefficients and 4 state variables per stage.
|
* <code>numStages</code> refers to the number of second order stages used.
|
||||||
* \image html Biquad.gif "Single Biquad filter stage"
|
* For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.
|
||||||
* Coefficients <code>b0, b1 and b2 </code> multiply the input signal <code>x[n]</code> and are referred to as the feedforward coefficients.
|
* \image html BiquadCascade.gif "8th order filter using a cascade of Biquad stages"
|
||||||
* Coefficients <code>a1</code> and <code>a2</code> multiply the output signal <code>y[n]</code> and are referred to as the feedback coefficients.
|
* A 9th order filter would be realized with <code>numStages=5</code> second order stages with the coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).
|
||||||
* Pay careful attention to the sign of the feedback coefficients.
|
*
|
||||||
* Some design tools use the difference equation
|
* \par
|
||||||
* <pre>
|
* The <code>pState</code> points to state variables array.
|
||||||
* y[n] = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] - a1 * y[n-1] - a2 * y[n-2]
|
* Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
|
||||||
* </pre>
|
* The state variables are arranged in the <code>pState</code> array as:
|
||||||
* In this case the feedback coefficients <code>a1</code> and <code>a2</code> must be negated when used with the CMSIS DSP Library.
|
* <pre>
|
||||||
*
|
* {x[n-1], x[n-2], y[n-1], y[n-2]}
|
||||||
* \par
|
* </pre>
|
||||||
* Higher order filters are realized as a cascade of second order sections.
|
*
|
||||||
* <code>numStages</code> refers to the number of second order stages used.
|
* \par
|
||||||
* For example, an 8th order filter would be realized with <code>numStages=4</code> second order stages.
|
* The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
|
||||||
* \image html BiquadCascade.gif "8th order filter using a cascade of Biquad stages"
|
* The state array has a total length of <code>4*numStages</code> values.
|
||||||
* A 9th order filter would be realized with <code>numStages=5</code> second order stages with the coefficients for one of the stages configured as a first order filter (<code>b2=0</code> and <code>a2=0</code>).
|
* The state variables are updated after each block of data is processed, the coefficients are untouched.
|
||||||
*
|
*
|
||||||
* \par
|
* \par Instance Structure
|
||||||
* The <code>pState</code> points to state variables array.
|
* The coefficients and state variables for a filter are stored together in an instance data structure.
|
||||||
* Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
|
* A separate instance structure must be defined for each filter.
|
||||||
* The state variables are arranged in the <code>pState</code> array as:
|
* Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
|
||||||
* <pre>
|
* There are separate instance structure declarations for each of the 3 supported data types.
|
||||||
* {x[n-1], x[n-2], y[n-1], y[n-2]}
|
*
|
||||||
* </pre>
|
* \par Init Functions
|
||||||
*
|
* There is also an associated initialization function for each data type.
|
||||||
* \par
|
* The initialization function performs following operations:
|
||||||
* The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
|
* - Sets the values of the internal structure fields.
|
||||||
* The state array has a total length of <code>4*numStages</code> values.
|
* - Zeros out the values in the state buffer.
|
||||||
* The state variables are updated after each block of data is processed, the coefficients are untouched.
|
* To do this manually without calling the init function, assign the follow subfields of the instance structure:
|
||||||
*
|
* numStages, pCoeffs, pState. Also set all of the values in pState to zero.
|
||||||
* \par Instance Structure
|
*
|
||||||
* The coefficients and state variables for a filter are stored together in an instance data structure.
|
* \par
|
||||||
* A separate instance structure must be defined for each filter.
|
* Use of the initialization function is optional.
|
||||||
* Coefficient arrays may be shared among several instances while state variable arrays cannot be shared.
|
* However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
|
||||||
* There are separate instance structure declarations for each of the 3 supported data types.
|
* To place an instance structure into a const data section, the instance structure must be manually initialized.
|
||||||
*
|
* Set the values in the state buffer to zeros before static initialization.
|
||||||
* \par Init Functions
|
* The code below statically initializes each of the 3 different data type filter instance structures
|
||||||
* There is also an associated initialization function for each data type.
|
* <pre>
|
||||||
* The initialization function performs following operations:
|
* arm_biquad_casd_df1_inst_f32 S1 = {numStages, pState, pCoeffs};
|
||||||
* - Sets the values of the internal structure fields.
|
* arm_biquad_casd_df1_inst_q15 S2 = {numStages, pState, pCoeffs, postShift};
|
||||||
* - Zeros out the values in the state buffer.
|
* arm_biquad_casd_df1_inst_q31 S3 = {numStages, pState, pCoeffs, postShift};
|
||||||
* To do this manually without calling the init function, assign the follow subfields of the instance structure:
|
* </pre>
|
||||||
* numStages, pCoeffs, pState. Also set all of the values in pState to zero.
|
* where <code>numStages</code> is the number of Biquad stages in the filter; <code>pState</code> is the address of the state buffer;
|
||||||
*
|
* <code>pCoeffs</code> is the address of the coefficient buffer; <code>postShift</code> shift to be applied.
|
||||||
* \par
|
*
|
||||||
* Use of the initialization function is optional.
|
* \par Fixed-Point Behavior
|
||||||
* However, if the initialization function is used, then the instance structure cannot be placed into a const data section.
|
* Care must be taken when using the Q15 and Q31 versions of the Biquad Cascade filter functions.
|
||||||
* To place an instance structure into a const data section, the instance structure must be manually initialized.
|
* Following issues must be considered:
|
||||||
* Set the values in the state buffer to zeros before static initialization.
|
* - Scaling of coefficients
|
||||||
* The code below statically initializes each of the 3 different data type filter instance structures
|
* - Filter gain
|
||||||
* <pre>
|
* - Overflow and saturation
|
||||||
* arm_biquad_casd_df1_inst_f32 S1 = {numStages, pState, pCoeffs};
|
*
|
||||||
* arm_biquad_casd_df1_inst_q15 S2 = {numStages, pState, pCoeffs, postShift};
|
* \par
|
||||||
* arm_biquad_casd_df1_inst_q31 S3 = {numStages, pState, pCoeffs, postShift};
|
* <b>Scaling of coefficients: </b>
|
||||||
* </pre>
|
* Filter coefficients are represented as fractional values and
|
||||||
* where <code>numStages</code> is the number of Biquad stages in the filter; <code>pState</code> is the address of the state buffer;
|
* coefficients are restricted to lie in the range <code>[-1 +1)</code>.
|
||||||
* <code>pCoeffs</code> is the address of the coefficient buffer; <code>postShift</code> shift to be applied.
|
* The fixed-point functions have an additional scaling parameter <code>postShift</code>
|
||||||
*
|
* which allow the filter coefficients to exceed the range <code>[+1 -1)</code>.
|
||||||
* \par Fixed-Point Behavior
|
* At the output of the filter's accumulator is a shift register which shifts the result by <code>postShift</code> bits.
|
||||||
* Care must be taken when using the Q15 and Q31 versions of the Biquad Cascade filter functions.
|
* \image html BiquadPostshift.gif "Fixed-point Biquad with shift by postShift bits after accumulator"
|
||||||
* Following issues must be considered:
|
* This essentially scales the filter coefficients by <code>2^postShift</code>.
|
||||||
* - Scaling of coefficients
|
* For example, to realize the coefficients
|
||||||
* - Filter gain
|
* <pre>
|
||||||
* - Overflow and saturation
|
* {1.5, -0.8, 1.2, 1.6, -0.9}
|
||||||
*
|
* </pre>
|
||||||
* \par
|
* set the pCoeffs array to:
|
||||||
* <b>Scaling of coefficients: </b>
|
* <pre>
|
||||||
* Filter coefficients are represented as fractional values and
|
* {0.75, -0.4, 0.6, 0.8, -0.45}
|
||||||
* coefficients are restricted to lie in the range <code>[-1 +1)</code>.
|
* </pre>
|
||||||
* The fixed-point functions have an additional scaling parameter <code>postShift</code>
|
* and set <code>postShift=1</code>
|
||||||
* which allow the filter coefficients to exceed the range <code>[+1 -1)</code>.
|
*
|
||||||
* At the output of the filter's accumulator is a shift register which shifts the result by <code>postShift</code> bits.
|
* \par
|
||||||
* \image html BiquadPostshift.gif "Fixed-point Biquad with shift by postShift bits after accumulator"
|
* <b>Filter gain: </b>
|
||||||
* This essentially scales the filter coefficients by <code>2^postShift</code>.
|
* The frequency response of a Biquad filter is a function of its coefficients.
|
||||||
* For example, to realize the coefficients
|
* It is possible for the gain through the filter to exceed 1.0 meaning that the filter increases the amplitude of certain frequencies.
|
||||||
* <pre>
|
* This means that an input signal with amplitude < 1.0 may result in an output > 1.0 and these are saturated or overflowed based on the implementation of the filter.
|
||||||
* {1.5, -0.8, 1.2, 1.6, -0.9}
|
* To avoid this behavior the filter needs to be scaled down such that its peak gain < 1.0 or the input signal must be scaled down so that the combination of input and filter are never overflowed.
|
||||||
* </pre>
|
*
|
||||||
* set the pCoeffs array to:
|
* \par
|
||||||
* <pre>
|
* <b>Overflow and saturation: </b>
|
||||||
* {0.75, -0.4, 0.6, 0.8, -0.45}
|
* For Q15 and Q31 versions, it is described separately as part of the function specific documentation below.
|
||||||
* </pre>
|
*/
|
||||||
* and set <code>postShift=1</code>
|
|
||||||
*
|
/**
|
||||||
* \par
|
* @addtogroup BiquadCascadeDF1
|
||||||
* <b>Filter gain: </b>
|
* @{
|
||||||
* The frequency response of a Biquad filter is a function of its coefficients.
|
*/
|
||||||
* It is possible for the gain through the filter to exceed 1.0 meaning that the filter increases the amplitude of certain frequencies.
|
|
||||||
* This means that an input signal with amplitude < 1.0 may result in an output > 1.0 and these are saturated or overflowed based on the implementation of the filter.
|
/**
|
||||||
* To avoid this behavior the filter needs to be scaled down such that its peak gain < 1.0 or the input signal must be scaled down so that the combination of input and filter are never overflowed.
|
* @param[in] *S points to an instance of the floating-point Biquad cascade structure.
|
||||||
*
|
* @param[in] *pSrc points to the block of input data.
|
||||||
* \par
|
* @param[out] *pDst points to the block of output data.
|
||||||
* <b>Overflow and saturation: </b>
|
* @param[in] blockSize number of samples to process per call.
|
||||||
* For Q15 and Q31 versions, it is described separately as part of the function specific documentation below.
|
* @return none.
|
||||||
*/
|
*
|
||||||
|
*/
|
||||||
/**
|
|
||||||
* @addtogroup BiquadCascadeDF1
|
void arm_biquad_cascade_df1_f32(
|
||||||
* @{
|
const arm_biquad_casd_df1_inst_f32 * S,
|
||||||
*/
|
float32_t * pSrc,
|
||||||
|
float32_t * pDst,
|
||||||
/**
|
uint32_t blockSize)
|
||||||
* @param[in] *S points to an instance of the floating-point Biquad cascade structure.
|
{
|
||||||
* @param[in] *pSrc points to the block of input data.
|
float32_t *pIn = pSrc; /* source pointer */
|
||||||
* @param[out] *pDst points to the block of output data.
|
float32_t *pOut = pDst; /* destination pointer */
|
||||||
* @param[in] blockSize number of samples to process per call.
|
float32_t *pState = S->pState; /* pState pointer */
|
||||||
* @return none.
|
float32_t *pCoeffs = S->pCoeffs; /* coefficient pointer */
|
||||||
*
|
float32_t acc; /* Simulates the accumulator */
|
||||||
*/
|
float32_t b0, b1, b2, a1, a2; /* Filter coefficients */
|
||||||
|
float32_t Xn1, Xn2, Yn1, Yn2; /* Filter pState variables */
|
||||||
void arm_biquad_cascade_df1_f32(
|
float32_t Xn; /* temporary input */
|
||||||
const arm_biquad_casd_df1_inst_f32 * S,
|
uint32_t sample, stage = S->numStages; /* loop counters */
|
||||||
float32_t * pSrc,
|
|
||||||
float32_t * pDst,
|
|
||||||
uint32_t blockSize)
|
#if defined (ARM_MATH_DSP)
|
||||||
{
|
|
||||||
float32_t *pIn = pSrc; /* source pointer */
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
float32_t *pOut = pDst; /* destination pointer */
|
|
||||||
float32_t *pState = S->pState; /* pState pointer */
|
do
|
||||||
float32_t *pCoeffs = S->pCoeffs; /* coefficient pointer */
|
{
|
||||||
float32_t acc; /* Simulates the accumulator */
|
/* Reading the coefficients */
|
||||||
float32_t b0, b1, b2, a1, a2; /* Filter coefficients */
|
b0 = *pCoeffs++;
|
||||||
float32_t Xn1, Xn2, Yn1, Yn2; /* Filter pState variables */
|
b1 = *pCoeffs++;
|
||||||
float32_t Xn; /* temporary input */
|
b2 = *pCoeffs++;
|
||||||
uint32_t sample, stage = S->numStages; /* loop counters */
|
a1 = *pCoeffs++;
|
||||||
|
a2 = *pCoeffs++;
|
||||||
|
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
/* Reading the pState values */
|
||||||
|
Xn1 = pState[0];
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
Xn2 = pState[1];
|
||||||
|
Yn1 = pState[2];
|
||||||
do
|
Yn2 = pState[3];
|
||||||
{
|
|
||||||
/* Reading the coefficients */
|
/* Apply loop unrolling and compute 4 output values simultaneously. */
|
||||||
b0 = *pCoeffs++;
|
/* The variable acc hold output values that are being computed:
|
||||||
b1 = *pCoeffs++;
|
*
|
||||||
b2 = *pCoeffs++;
|
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||||
a1 = *pCoeffs++;
|
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||||
a2 = *pCoeffs++;
|
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||||
|
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||||
/* Reading the pState values */
|
*/
|
||||||
Xn1 = pState[0];
|
|
||||||
Xn2 = pState[1];
|
sample = blockSize >> 2U;
|
||||||
Yn1 = pState[2];
|
|
||||||
Yn2 = pState[3];
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
/* Apply loop unrolling and compute 4 output values simultaneously. */
|
while (sample > 0U)
|
||||||
/* The variable acc hold output values that are being computed:
|
{
|
||||||
*
|
/* Read the first input */
|
||||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
Xn = *pIn++;
|
||||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
|
||||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
Yn2 = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2);
|
||||||
*/
|
|
||||||
|
/* Store the result in the accumulator in the destination buffer. */
|
||||||
sample = blockSize >> 2u;
|
*pOut++ = Yn2;
|
||||||
|
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
/* Every time after the output is computed state should be updated. */
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
/* The states should be updated as: */
|
||||||
while(sample > 0u)
|
/* Xn2 = Xn1 */
|
||||||
{
|
/* Xn1 = Xn */
|
||||||
/* Read the first input */
|
/* Yn2 = Yn1 */
|
||||||
Xn = *pIn++;
|
/* Yn1 = acc */
|
||||||
|
|
||||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
/* Read the second input */
|
||||||
Yn2 = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2);
|
Xn2 = *pIn++;
|
||||||
|
|
||||||
/* Store the result in the accumulator in the destination buffer. */
|
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||||
*pOut++ = Yn2;
|
Yn1 = (b0 * Xn2) + (b1 * Xn) + (b2 * Xn1) + (a1 * Yn2) + (a2 * Yn1);
|
||||||
|
|
||||||
/* Every time after the output is computed state should be updated. */
|
/* Store the result in the accumulator in the destination buffer. */
|
||||||
/* The states should be updated as: */
|
*pOut++ = Yn1;
|
||||||
/* Xn2 = Xn1 */
|
|
||||||
/* Xn1 = Xn */
|
/* Every time after the output is computed state should be updated. */
|
||||||
/* Yn2 = Yn1 */
|
/* The states should be updated as: */
|
||||||
/* Yn1 = acc */
|
/* Xn2 = Xn1 */
|
||||||
|
/* Xn1 = Xn */
|
||||||
/* Read the second input */
|
/* Yn2 = Yn1 */
|
||||||
Xn2 = *pIn++;
|
/* Yn1 = acc */
|
||||||
|
|
||||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
/* Read the third input */
|
||||||
Yn1 = (b0 * Xn2) + (b1 * Xn) + (b2 * Xn1) + (a1 * Yn2) + (a2 * Yn1);
|
Xn1 = *pIn++;
|
||||||
|
|
||||||
/* Store the result in the accumulator in the destination buffer. */
|
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||||
*pOut++ = Yn1;
|
Yn2 = (b0 * Xn1) + (b1 * Xn2) + (b2 * Xn) + (a1 * Yn1) + (a2 * Yn2);
|
||||||
|
|
||||||
/* Every time after the output is computed state should be updated. */
|
/* Store the result in the accumulator in the destination buffer. */
|
||||||
/* The states should be updated as: */
|
*pOut++ = Yn2;
|
||||||
/* Xn2 = Xn1 */
|
|
||||||
/* Xn1 = Xn */
|
/* Every time after the output is computed state should be updated. */
|
||||||
/* Yn2 = Yn1 */
|
/* The states should be updated as: */
|
||||||
/* Yn1 = acc */
|
/* Xn2 = Xn1 */
|
||||||
|
/* Xn1 = Xn */
|
||||||
/* Read the third input */
|
/* Yn2 = Yn1 */
|
||||||
Xn1 = *pIn++;
|
/* Yn1 = acc */
|
||||||
|
|
||||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
/* Read the forth input */
|
||||||
Yn2 = (b0 * Xn1) + (b1 * Xn2) + (b2 * Xn) + (a1 * Yn1) + (a2 * Yn2);
|
Xn = *pIn++;
|
||||||
|
|
||||||
/* Store the result in the accumulator in the destination buffer. */
|
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||||
*pOut++ = Yn2;
|
Yn1 = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn2) + (a2 * Yn1);
|
||||||
|
|
||||||
/* Every time after the output is computed state should be updated. */
|
/* Store the result in the accumulator in the destination buffer. */
|
||||||
/* The states should be updated as: */
|
*pOut++ = Yn1;
|
||||||
/* Xn2 = Xn1 */
|
|
||||||
/* Xn1 = Xn */
|
/* Every time after the output is computed state should be updated. */
|
||||||
/* Yn2 = Yn1 */
|
/* The states should be updated as: */
|
||||||
/* Yn1 = acc */
|
/* Xn2 = Xn1 */
|
||||||
|
/* Xn1 = Xn */
|
||||||
/* Read the forth input */
|
/* Yn2 = Yn1 */
|
||||||
Xn = *pIn++;
|
/* Yn1 = acc */
|
||||||
|
Xn2 = Xn1;
|
||||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
Xn1 = Xn;
|
||||||
Yn1 = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn2) + (a2 * Yn1);
|
|
||||||
|
/* decrement the loop counter */
|
||||||
/* Store the result in the accumulator in the destination buffer. */
|
sample--;
|
||||||
*pOut++ = Yn1;
|
|
||||||
|
}
|
||||||
/* Every time after the output is computed state should be updated. */
|
|
||||||
/* The states should be updated as: */
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
/* Xn2 = Xn1 */
|
** No loop unrolling is used. */
|
||||||
/* Xn1 = Xn */
|
sample = blockSize & 0x3U;
|
||||||
/* Yn2 = Yn1 */
|
|
||||||
/* Yn1 = acc */
|
while (sample > 0U)
|
||||||
Xn2 = Xn1;
|
{
|
||||||
Xn1 = Xn;
|
/* Read the input */
|
||||||
|
Xn = *pIn++;
|
||||||
/* decrement the loop counter */
|
|
||||||
sample--;
|
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||||
|
acc = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2);
|
||||||
}
|
|
||||||
|
/* Store the result in the accumulator in the destination buffer. */
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
*pOut++ = acc;
|
||||||
** No loop unrolling is used. */
|
|
||||||
sample = blockSize & 0x3u;
|
/* Every time after the output is computed state should be updated. */
|
||||||
|
/* The states should be updated as: */
|
||||||
while(sample > 0u)
|
/* Xn2 = Xn1 */
|
||||||
{
|
/* Xn1 = Xn */
|
||||||
/* Read the input */
|
/* Yn2 = Yn1 */
|
||||||
Xn = *pIn++;
|
/* Yn1 = acc */
|
||||||
|
Xn2 = Xn1;
|
||||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
Xn1 = Xn;
|
||||||
acc = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2);
|
Yn2 = Yn1;
|
||||||
|
Yn1 = acc;
|
||||||
/* Store the result in the accumulator in the destination buffer. */
|
|
||||||
*pOut++ = acc;
|
/* decrement the loop counter */
|
||||||
|
sample--;
|
||||||
/* Every time after the output is computed state should be updated. */
|
|
||||||
/* The states should be updated as: */
|
}
|
||||||
/* Xn2 = Xn1 */
|
|
||||||
/* Xn1 = Xn */
|
/* Store the updated state variables back into the pState array */
|
||||||
/* Yn2 = Yn1 */
|
*pState++ = Xn1;
|
||||||
/* Yn1 = acc */
|
*pState++ = Xn2;
|
||||||
Xn2 = Xn1;
|
*pState++ = Yn1;
|
||||||
Xn1 = Xn;
|
*pState++ = Yn2;
|
||||||
Yn2 = Yn1;
|
|
||||||
Yn1 = acc;
|
/* The first stage goes from the input buffer to the output buffer. */
|
||||||
|
/* Subsequent numStages occur in-place in the output buffer */
|
||||||
/* decrement the loop counter */
|
pIn = pDst;
|
||||||
sample--;
|
|
||||||
|
/* Reset the output pointer */
|
||||||
}
|
pOut = pDst;
|
||||||
|
|
||||||
/* Store the updated state variables back into the pState array */
|
/* decrement the loop counter */
|
||||||
*pState++ = Xn1;
|
stage--;
|
||||||
*pState++ = Xn2;
|
|
||||||
*pState++ = Yn1;
|
} while (stage > 0U);
|
||||||
*pState++ = Yn2;
|
|
||||||
|
#else
|
||||||
/* The first stage goes from the input buffer to the output buffer. */
|
|
||||||
/* Subsequent numStages occur in-place in the output buffer */
|
/* Run the below code for Cortex-M0 */
|
||||||
pIn = pDst;
|
|
||||||
|
do
|
||||||
/* Reset the output pointer */
|
{
|
||||||
pOut = pDst;
|
/* Reading the coefficients */
|
||||||
|
b0 = *pCoeffs++;
|
||||||
/* decrement the loop counter */
|
b1 = *pCoeffs++;
|
||||||
stage--;
|
b2 = *pCoeffs++;
|
||||||
|
a1 = *pCoeffs++;
|
||||||
} while(stage > 0u);
|
a2 = *pCoeffs++;
|
||||||
|
|
||||||
#else
|
/* Reading the pState values */
|
||||||
|
Xn1 = pState[0];
|
||||||
/* Run the below code for Cortex-M0 */
|
Xn2 = pState[1];
|
||||||
|
Yn1 = pState[2];
|
||||||
do
|
Yn2 = pState[3];
|
||||||
{
|
|
||||||
/* Reading the coefficients */
|
/* The variables acc holds the output value that is computed:
|
||||||
b0 = *pCoeffs++;
|
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||||
b1 = *pCoeffs++;
|
*/
|
||||||
b2 = *pCoeffs++;
|
|
||||||
a1 = *pCoeffs++;
|
sample = blockSize;
|
||||||
a2 = *pCoeffs++;
|
|
||||||
|
while (sample > 0U)
|
||||||
/* Reading the pState values */
|
{
|
||||||
Xn1 = pState[0];
|
/* Read the input */
|
||||||
Xn2 = pState[1];
|
Xn = *pIn++;
|
||||||
Yn1 = pState[2];
|
|
||||||
Yn2 = pState[3];
|
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||||
|
acc = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2);
|
||||||
/* The variables acc holds the output value that is computed:
|
|
||||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
/* Store the result in the accumulator in the destination buffer. */
|
||||||
*/
|
*pOut++ = acc;
|
||||||
|
|
||||||
sample = blockSize;
|
/* Every time after the output is computed state should be updated. */
|
||||||
|
/* The states should be updated as: */
|
||||||
while(sample > 0u)
|
/* Xn2 = Xn1 */
|
||||||
{
|
/* Xn1 = Xn */
|
||||||
/* Read the input */
|
/* Yn2 = Yn1 */
|
||||||
Xn = *pIn++;
|
/* Yn1 = acc */
|
||||||
|
Xn2 = Xn1;
|
||||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
Xn1 = Xn;
|
||||||
acc = (b0 * Xn) + (b1 * Xn1) + (b2 * Xn2) + (a1 * Yn1) + (a2 * Yn2);
|
Yn2 = Yn1;
|
||||||
|
Yn1 = acc;
|
||||||
/* Store the result in the accumulator in the destination buffer. */
|
|
||||||
*pOut++ = acc;
|
/* decrement the loop counter */
|
||||||
|
sample--;
|
||||||
/* Every time after the output is computed state should be updated. */
|
}
|
||||||
/* The states should be updated as: */
|
|
||||||
/* Xn2 = Xn1 */
|
/* Store the updated state variables back into the pState array */
|
||||||
/* Xn1 = Xn */
|
*pState++ = Xn1;
|
||||||
/* Yn2 = Yn1 */
|
*pState++ = Xn2;
|
||||||
/* Yn1 = acc */
|
*pState++ = Yn1;
|
||||||
Xn2 = Xn1;
|
*pState++ = Yn2;
|
||||||
Xn1 = Xn;
|
|
||||||
Yn2 = Yn1;
|
/* The first stage goes from the input buffer to the output buffer. */
|
||||||
Yn1 = acc;
|
/* Subsequent numStages occur in-place in the output buffer */
|
||||||
|
pIn = pDst;
|
||||||
/* decrement the loop counter */
|
|
||||||
sample--;
|
/* Reset the output pointer */
|
||||||
}
|
pOut = pDst;
|
||||||
|
|
||||||
/* Store the updated state variables back into the pState array */
|
/* decrement the loop counter */
|
||||||
*pState++ = Xn1;
|
stage--;
|
||||||
*pState++ = Xn2;
|
|
||||||
*pState++ = Yn1;
|
} while (stage > 0U);
|
||||||
*pState++ = Yn2;
|
|
||||||
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
/* The first stage goes from the input buffer to the output buffer. */
|
|
||||||
/* Subsequent numStages occur in-place in the output buffer */
|
}
|
||||||
pIn = pDst;
|
|
||||||
|
|
||||||
/* Reset the output pointer */
|
/**
|
||||||
pOut = pDst;
|
* @} end of BiquadCascadeDF1 group
|
||||||
|
*/
|
||||||
/* decrement the loop counter */
|
|
||||||
stage--;
|
|
||||||
|
|
||||||
} while(stage > 0u);
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BiquadCascadeDF1 group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,286 +1,273 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_biquad_cascade_df1_fast_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Fast processing function for the Q15 Biquad cascade filter
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_biquad_cascade_df1_fast_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Fast processing function for the
|
* -------------------------------------------------------------------- */
|
||||||
* Q15 Biquad cascade filter.
|
/*
|
||||||
*
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
* Target Processor: Cortex-M4/Cortex-M3
|
*
|
||||||
*
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* Redistribution and use in source and binary forms, with or without
|
*
|
||||||
* modification, are permitted provided that the following conditions
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* are met:
|
* not use this file except in compliance with the License.
|
||||||
* - Redistributions of source code must retain the above copyright
|
* You may obtain a copy of the License at
|
||||||
* notice, this list of conditions and the following disclaimer.
|
*
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* notice, this list of conditions and the following disclaimer in
|
*
|
||||||
* the documentation and/or other materials provided with the
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distribution.
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* may be used to endorse or promote products derived from this
|
* See the License for the specific language governing permissions and
|
||||||
* software without specific prior written permission.
|
* limitations under the License.
|
||||||
*
|
*/
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
#include "arm_math.h"
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
/**
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
* @ingroup groupFilters
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
*/
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
/**
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
* @addtogroup BiquadCascadeDF1
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @{
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*/
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
* -------------------------------------------------------------------- */
|
/**
|
||||||
|
* @details
|
||||||
#include "arm_math.h"
|
* @param[in] *S points to an instance of the Q15 Biquad cascade structure.
|
||||||
|
* @param[in] *pSrc points to the block of input data.
|
||||||
/**
|
* @param[out] *pDst points to the block of output data.
|
||||||
* @ingroup groupFilters
|
* @param[in] blockSize number of samples to process per call.
|
||||||
*/
|
* @return none.
|
||||||
|
*
|
||||||
/**
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @addtogroup BiquadCascadeDF1
|
* \par
|
||||||
* @{
|
* This fast version uses a 32-bit accumulator with 2.30 format.
|
||||||
*/
|
* The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit.
|
||||||
|
* Thus, if the accumulator result overflows it wraps around and distorts the result.
|
||||||
/**
|
* In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25).
|
||||||
* @details
|
* The 2.30 accumulator is then shifted by <code>postShift</code> bits and the result truncated to 1.15 format by discarding the low 16 bits.
|
||||||
* @param[in] *S points to an instance of the Q15 Biquad cascade structure.
|
*
|
||||||
* @param[in] *pSrc points to the block of input data.
|
* \par
|
||||||
* @param[out] *pDst points to the block of output data.
|
* Refer to the function <code>arm_biquad_cascade_df1_q15()</code> for a slower implementation of this function which uses 64-bit accumulation to avoid wrap around distortion. Both the slow and the fast versions use the same instance structure.
|
||||||
* @param[in] blockSize number of samples to process per call.
|
* Use the function <code>arm_biquad_cascade_df1_init_q15()</code> to initialize the filter structure.
|
||||||
* @return none.
|
*
|
||||||
*
|
*/
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
|
||||||
* \par
|
void arm_biquad_cascade_df1_fast_q15(
|
||||||
* This fast version uses a 32-bit accumulator with 2.30 format.
|
const arm_biquad_casd_df1_inst_q15 * S,
|
||||||
* The accumulator maintains full precision of the intermediate multiplication results but provides only a single guard bit.
|
q15_t * pSrc,
|
||||||
* Thus, if the accumulator result overflows it wraps around and distorts the result.
|
q15_t * pDst,
|
||||||
* In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25).
|
uint32_t blockSize)
|
||||||
* The 2.30 accumulator is then shifted by <code>postShift</code> bits and the result truncated to 1.15 format by discarding the low 16 bits.
|
{
|
||||||
*
|
q15_t *pIn = pSrc; /* Source pointer */
|
||||||
* \par
|
q15_t *pOut = pDst; /* Destination pointer */
|
||||||
* Refer to the function <code>arm_biquad_cascade_df1_q15()</code> for a slower implementation of this function which uses 64-bit accumulation to avoid wrap around distortion. Both the slow and the fast versions use the same instance structure.
|
q31_t in; /* Temporary variable to hold input value */
|
||||||
* Use the function <code>arm_biquad_cascade_df1_init_q15()</code> to initialize the filter structure.
|
q31_t out; /* Temporary variable to hold output value */
|
||||||
*
|
q31_t b0; /* Temporary variable to hold bo value */
|
||||||
*/
|
q31_t b1, a1; /* Filter coefficients */
|
||||||
|
q31_t state_in, state_out; /* Filter state variables */
|
||||||
void arm_biquad_cascade_df1_fast_q15(
|
q31_t acc; /* Accumulator */
|
||||||
const arm_biquad_casd_df1_inst_q15 * S,
|
int32_t shift = (int32_t) (15 - S->postShift); /* Post shift */
|
||||||
q15_t * pSrc,
|
q15_t *pState = S->pState; /* State pointer */
|
||||||
q15_t * pDst,
|
q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
|
||||||
uint32_t blockSize)
|
uint32_t sample, stage = S->numStages; /* Stage loop counter */
|
||||||
{
|
|
||||||
q15_t *pIn = pSrc; /* Source pointer */
|
|
||||||
q15_t *pOut = pDst; /* Destination pointer */
|
|
||||||
q31_t in; /* Temporary variable to hold input value */
|
do
|
||||||
q31_t out; /* Temporary variable to hold output value */
|
{
|
||||||
q31_t b0; /* Temporary variable to hold bo value */
|
|
||||||
q31_t b1, a1; /* Filter coefficients */
|
/* Read the b0 and 0 coefficients using SIMD */
|
||||||
q31_t state_in, state_out; /* Filter state variables */
|
b0 = *__SIMD32(pCoeffs)++;
|
||||||
q31_t acc; /* Accumulator */
|
|
||||||
int32_t shift = (int32_t) (15 - S->postShift); /* Post shift */
|
/* Read the b1 and b2 coefficients using SIMD */
|
||||||
q15_t *pState = S->pState; /* State pointer */
|
b1 = *__SIMD32(pCoeffs)++;
|
||||||
q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
|
|
||||||
uint32_t sample, stage = S->numStages; /* Stage loop counter */
|
/* Read the a1 and a2 coefficients using SIMD */
|
||||||
|
a1 = *__SIMD32(pCoeffs)++;
|
||||||
|
|
||||||
|
/* Read the input state values from the state buffer: x[n-1], x[n-2] */
|
||||||
do
|
state_in = *__SIMD32(pState)++;
|
||||||
{
|
|
||||||
|
/* Read the output state values from the state buffer: y[n-1], y[n-2] */
|
||||||
/* Read the b0 and 0 coefficients using SIMD */
|
state_out = *__SIMD32(pState)--;
|
||||||
b0 = *__SIMD32(pCoeffs)++;
|
|
||||||
|
/* Apply loop unrolling and compute 2 output values simultaneously. */
|
||||||
/* Read the b1 and b2 coefficients using SIMD */
|
/* The variable acc hold output values that are being computed:
|
||||||
b1 = *__SIMD32(pCoeffs)++;
|
*
|
||||||
|
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||||
/* Read the a1 and a2 coefficients using SIMD */
|
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||||
a1 = *__SIMD32(pCoeffs)++;
|
*/
|
||||||
|
sample = blockSize >> 1U;
|
||||||
/* Read the input state values from the state buffer: x[n-1], x[n-2] */
|
|
||||||
state_in = *__SIMD32(pState)++;
|
/* First part of the processing with loop unrolling. Compute 2 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 sample. */
|
||||||
/* Read the output state values from the state buffer: y[n-1], y[n-2] */
|
while (sample > 0U)
|
||||||
state_out = *__SIMD32(pState)--;
|
{
|
||||||
|
|
||||||
/* Apply loop unrolling and compute 2 output values simultaneously. */
|
/* Read the input */
|
||||||
/* The variable acc hold output values that are being computed:
|
in = *__SIMD32(pIn)++;
|
||||||
*
|
|
||||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
/* out = b0 * x[n] + 0 * 0 */
|
||||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
out = __SMUAD(b0, in);
|
||||||
*/
|
/* acc = b1 * x[n-1] + acc += b2 * x[n-2] + out */
|
||||||
sample = blockSize >> 1u;
|
acc = __SMLAD(b1, state_in, out);
|
||||||
|
/* acc += a1 * y[n-1] + acc += a2 * y[n-2] */
|
||||||
/* First part of the processing with loop unrolling. Compute 2 outputs at a time.
|
acc = __SMLAD(a1, state_out, acc);
|
||||||
** a second loop below computes the remaining 1 sample. */
|
|
||||||
while(sample > 0u)
|
/* The result is converted from 3.29 to 1.31 and then saturation is applied */
|
||||||
{
|
out = __SSAT((acc >> shift), 16);
|
||||||
|
|
||||||
/* Read the input */
|
/* Every time after the output is computed state should be updated. */
|
||||||
in = *__SIMD32(pIn)++;
|
/* The states should be updated as: */
|
||||||
|
/* Xn2 = Xn1 */
|
||||||
/* out = b0 * x[n] + 0 * 0 */
|
/* Xn1 = Xn */
|
||||||
out = __SMUAD(b0, in);
|
/* Yn2 = Yn1 */
|
||||||
/* acc = b1 * x[n-1] + acc += b2 * x[n-2] + out */
|
/* Yn1 = acc */
|
||||||
acc = __SMLAD(b1, state_in, out);
|
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
|
||||||
/* acc += a1 * y[n-1] + acc += a2 * y[n-2] */
|
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
|
||||||
acc = __SMLAD(a1, state_out, acc);
|
|
||||||
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
/* The result is converted from 3.29 to 1.31 and then saturation is applied */
|
|
||||||
out = __SSAT((acc >> shift), 16);
|
state_in = __PKHBT(in, state_in, 16);
|
||||||
|
state_out = __PKHBT(out, state_out, 16);
|
||||||
/* Every time after the output is computed state should be updated. */
|
|
||||||
/* The states should be updated as: */
|
#else
|
||||||
/* Xn2 = Xn1 */
|
|
||||||
/* Xn1 = Xn */
|
state_in = __PKHBT(state_in >> 16, (in >> 16), 16);
|
||||||
/* Yn2 = Yn1 */
|
state_out = __PKHBT(state_out >> 16, (out), 16);
|
||||||
/* Yn1 = acc */
|
|
||||||
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
|
|
||||||
|
/* out = b0 * x[n] + 0 * 0 */
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
out = __SMUADX(b0, in);
|
||||||
|
/* acc0 = b1 * x[n-1] , acc0 += b2 * x[n-2] + out */
|
||||||
state_in = __PKHBT(in, state_in, 16);
|
acc = __SMLAD(b1, state_in, out);
|
||||||
state_out = __PKHBT(out, state_out, 16);
|
/* acc += a1 * y[n-1] + acc += a2 * y[n-2] */
|
||||||
|
acc = __SMLAD(a1, state_out, acc);
|
||||||
#else
|
|
||||||
|
/* The result is converted from 3.29 to 1.31 and then saturation is applied */
|
||||||
state_in = __PKHBT(state_in >> 16, (in >> 16), 16);
|
out = __SSAT((acc >> shift), 16);
|
||||||
state_out = __PKHBT(state_out >> 16, (out), 16);
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
/* Store the output in the destination buffer. */
|
||||||
|
|
||||||
/* out = b0 * x[n] + 0 * 0 */
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
out = __SMUADX(b0, in);
|
|
||||||
/* acc0 = b1 * x[n-1] , acc0 += b2 * x[n-2] + out */
|
*__SIMD32(pOut)++ = __PKHBT(state_out, out, 16);
|
||||||
acc = __SMLAD(b1, state_in, out);
|
|
||||||
/* acc += a1 * y[n-1] + acc += a2 * y[n-2] */
|
#else
|
||||||
acc = __SMLAD(a1, state_out, acc);
|
|
||||||
|
*__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16);
|
||||||
/* The result is converted from 3.29 to 1.31 and then saturation is applied */
|
|
||||||
out = __SSAT((acc >> shift), 16);
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
|
|
||||||
|
/* Every time after the output is computed state should be updated. */
|
||||||
/* Store the output in the destination buffer. */
|
/* The states should be updated as: */
|
||||||
|
/* Xn2 = Xn1 */
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
/* Xn1 = Xn */
|
||||||
|
/* Yn2 = Yn1 */
|
||||||
*__SIMD32(pOut)++ = __PKHBT(state_out, out, 16);
|
/* Yn1 = acc */
|
||||||
|
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
|
||||||
#else
|
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
|
||||||
|
|
||||||
*__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16);
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
state_in = __PKHBT(in >> 16, state_in, 16);
|
||||||
|
state_out = __PKHBT(out, state_out, 16);
|
||||||
/* Every time after the output is computed state should be updated. */
|
|
||||||
/* The states should be updated as: */
|
#else
|
||||||
/* Xn2 = Xn1 */
|
|
||||||
/* Xn1 = Xn */
|
state_in = __PKHBT(state_in >> 16, in, 16);
|
||||||
/* Yn2 = Yn1 */
|
state_out = __PKHBT(state_out >> 16, out, 16);
|
||||||
/* Yn1 = acc */
|
|
||||||
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
|
|
||||||
|
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
/* Decrement the loop counter */
|
||||||
|
sample--;
|
||||||
state_in = __PKHBT(in >> 16, state_in, 16);
|
|
||||||
state_out = __PKHBT(out, state_out, 16);
|
}
|
||||||
|
|
||||||
#else
|
/* If the blockSize is not a multiple of 2, compute any remaining output samples here.
|
||||||
|
** No loop unrolling is used. */
|
||||||
state_in = __PKHBT(state_in >> 16, in, 16);
|
|
||||||
state_out = __PKHBT(state_out >> 16, out, 16);
|
if ((blockSize & 0x1U) != 0U)
|
||||||
|
{
|
||||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
/* Read the input */
|
||||||
|
in = *pIn++;
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
/* out = b0 * x[n] + 0 * 0 */
|
||||||
sample--;
|
|
||||||
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
}
|
|
||||||
|
out = __SMUAD(b0, in);
|
||||||
/* If the blockSize is not a multiple of 2, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
#else
|
||||||
|
|
||||||
if((blockSize & 0x1u) != 0u)
|
out = __SMUADX(b0, in);
|
||||||
{
|
|
||||||
/* Read the input */
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
in = *pIn++;
|
|
||||||
|
/* acc = b1 * x[n-1], acc += b2 * x[n-2] + out */
|
||||||
/* out = b0 * x[n] + 0 * 0 */
|
acc = __SMLAD(b1, state_in, out);
|
||||||
|
/* acc += a1 * y[n-1] + acc += a2 * y[n-2] */
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
acc = __SMLAD(a1, state_out, acc);
|
||||||
|
|
||||||
out = __SMUAD(b0, in);
|
/* The result is converted from 3.29 to 1.31 and then saturation is applied */
|
||||||
|
out = __SSAT((acc >> shift), 16);
|
||||||
#else
|
|
||||||
|
/* Store the output in the destination buffer. */
|
||||||
out = __SMUADX(b0, in);
|
*pOut++ = (q15_t) out;
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
/* Every time after the output is computed state should be updated. */
|
||||||
|
/* The states should be updated as: */
|
||||||
/* acc = b1 * x[n-1], acc += b2 * x[n-2] + out */
|
/* Xn2 = Xn1 */
|
||||||
acc = __SMLAD(b1, state_in, out);
|
/* Xn1 = Xn */
|
||||||
/* acc += a1 * y[n-1] + acc += a2 * y[n-2] */
|
/* Yn2 = Yn1 */
|
||||||
acc = __SMLAD(a1, state_out, acc);
|
/* Yn1 = acc */
|
||||||
|
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
|
||||||
/* The result is converted from 3.29 to 1.31 and then saturation is applied */
|
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
|
||||||
out = __SSAT((acc >> shift), 16);
|
|
||||||
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
/* Store the output in the destination buffer. */
|
|
||||||
*pOut++ = (q15_t) out;
|
state_in = __PKHBT(in, state_in, 16);
|
||||||
|
state_out = __PKHBT(out, state_out, 16);
|
||||||
/* Every time after the output is computed state should be updated. */
|
|
||||||
/* The states should be updated as: */
|
#else
|
||||||
/* Xn2 = Xn1 */
|
|
||||||
/* Xn1 = Xn */
|
state_in = __PKHBT(state_in >> 16, in, 16);
|
||||||
/* Yn2 = Yn1 */
|
state_out = __PKHBT(state_out >> 16, out, 16);
|
||||||
/* Yn1 = acc */
|
|
||||||
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
|
|
||||||
|
}
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
|
||||||
|
/* The first stage goes from the input buffer to the output buffer. */
|
||||||
state_in = __PKHBT(in, state_in, 16);
|
/* Subsequent (numStages - 1) occur in-place in the output buffer */
|
||||||
state_out = __PKHBT(out, state_out, 16);
|
pIn = pDst;
|
||||||
|
|
||||||
#else
|
/* Reset the output pointer */
|
||||||
|
pOut = pDst;
|
||||||
state_in = __PKHBT(state_in >> 16, in, 16);
|
|
||||||
state_out = __PKHBT(state_out >> 16, out, 16);
|
/* Store the updated state variables back into the state array */
|
||||||
|
*__SIMD32(pState)++ = state_in;
|
||||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
*__SIMD32(pState)++ = state_out;
|
||||||
|
|
||||||
}
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* The first stage goes from the input buffer to the output buffer. */
|
stage--;
|
||||||
/* Subsequent (numStages - 1) occur in-place in the output buffer */
|
|
||||||
pIn = pDst;
|
} while (stage > 0U);
|
||||||
|
}
|
||||||
/* Reset the output pointer */
|
|
||||||
pOut = pDst;
|
|
||||||
|
/**
|
||||||
/* Store the updated state variables back into the state array */
|
* @} end of BiquadCascadeDF1 group
|
||||||
*__SIMD32(pState)++ = state_in;
|
*/
|
||||||
*__SIMD32(pState)++ = state_out;
|
|
||||||
|
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
|
||||||
stage--;
|
|
||||||
|
|
||||||
} while(stage > 0u);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BiquadCascadeDF1 group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,305 +1,292 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_biquad_cascade_df1_fast_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Processing function for the Q31 Fast Biquad cascade DirectFormI(DF1) filter
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_biquad_cascade_df1_fast_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Processing function for the
|
* -------------------------------------------------------------------- */
|
||||||
* Q31 Fast Biquad cascade DirectFormI(DF1) filter.
|
/*
|
||||||
*
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
* Target Processor: Cortex-M4/Cortex-M3
|
*
|
||||||
*
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* Redistribution and use in source and binary forms, with or without
|
*
|
||||||
* modification, are permitted provided that the following conditions
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* are met:
|
* not use this file except in compliance with the License.
|
||||||
* - Redistributions of source code must retain the above copyright
|
* You may obtain a copy of the License at
|
||||||
* notice, this list of conditions and the following disclaimer.
|
*
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* notice, this list of conditions and the following disclaimer in
|
*
|
||||||
* the documentation and/or other materials provided with the
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distribution.
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* may be used to endorse or promote products derived from this
|
* See the License for the specific language governing permissions and
|
||||||
* software without specific prior written permission.
|
* limitations under the License.
|
||||||
*
|
*/
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
#include "arm_math.h"
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
/**
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
* @ingroup groupFilters
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
*/
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
/**
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
* @addtogroup BiquadCascadeDF1
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @{
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*/
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
* -------------------------------------------------------------------- */
|
/**
|
||||||
|
* @details
|
||||||
#include "arm_math.h"
|
*
|
||||||
|
* @param[in] *S points to an instance of the Q31 Biquad cascade structure.
|
||||||
/**
|
* @param[in] *pSrc points to the block of input data.
|
||||||
* @ingroup groupFilters
|
* @param[out] *pDst points to the block of output data.
|
||||||
*/
|
* @param[in] blockSize number of samples to process per call.
|
||||||
|
* @return none.
|
||||||
/**
|
*
|
||||||
* @addtogroup BiquadCascadeDF1
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* This function is optimized for speed at the expense of fixed-point precision and overflow protection.
|
||||||
|
* The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format.
|
||||||
/**
|
* These intermediate results are added to a 2.30 accumulator.
|
||||||
* @details
|
* Finally, the accumulator is saturated and converted to a 1.31 result.
|
||||||
*
|
* The fast version has the same overflow behavior as the standard version and provides less precision since it discards the low 32 bits of each multiplication result.
|
||||||
* @param[in] *S points to an instance of the Q31 Biquad cascade structure.
|
* In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25). Use the intialization function
|
||||||
* @param[in] *pSrc points to the block of input data.
|
* arm_biquad_cascade_df1_init_q31() to initialize filter structure.
|
||||||
* @param[out] *pDst points to the block of output data.
|
*
|
||||||
* @param[in] blockSize number of samples to process per call.
|
* \par
|
||||||
* @return none.
|
* Refer to the function <code>arm_biquad_cascade_df1_q31()</code> for a slower implementation of this function which uses 64-bit accumulation to provide higher precision. Both the slow and the fast versions use the same instance structure.
|
||||||
*
|
* Use the function <code>arm_biquad_cascade_df1_init_q31()</code> to initialize the filter structure.
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
*/
|
||||||
* \par
|
|
||||||
* This function is optimized for speed at the expense of fixed-point precision and overflow protection.
|
void arm_biquad_cascade_df1_fast_q31(
|
||||||
* The result of each 1.31 x 1.31 multiplication is truncated to 2.30 format.
|
const arm_biquad_casd_df1_inst_q31 * S,
|
||||||
* These intermediate results are added to a 2.30 accumulator.
|
q31_t * pSrc,
|
||||||
* Finally, the accumulator is saturated and converted to a 1.31 result.
|
q31_t * pDst,
|
||||||
* The fast version has the same overflow behavior as the standard version and provides less precision since it discards the low 32 bits of each multiplication result.
|
uint32_t blockSize)
|
||||||
* In order to avoid overflows completely the input signal must be scaled down by two bits and lie in the range [-0.25 +0.25). Use the intialization function
|
{
|
||||||
* arm_biquad_cascade_df1_init_q31() to initialize filter structure.
|
q31_t acc = 0; /* accumulator */
|
||||||
*
|
q31_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */
|
||||||
* \par
|
q31_t b0, b1, b2, a1, a2; /* Filter coefficients */
|
||||||
* Refer to the function <code>arm_biquad_cascade_df1_q31()</code> for a slower implementation of this function which uses 64-bit accumulation to provide higher precision. Both the slow and the fast versions use the same instance structure.
|
q31_t *pIn = pSrc; /* input pointer initialization */
|
||||||
* Use the function <code>arm_biquad_cascade_df1_init_q31()</code> to initialize the filter structure.
|
q31_t *pOut = pDst; /* output pointer initialization */
|
||||||
*/
|
q31_t *pState = S->pState; /* pState pointer initialization */
|
||||||
|
q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */
|
||||||
void arm_biquad_cascade_df1_fast_q31(
|
q31_t Xn; /* temporary input */
|
||||||
const arm_biquad_casd_df1_inst_q31 * S,
|
int32_t shift = (int32_t) S->postShift + 1; /* Shift to be applied to the output */
|
||||||
q31_t * pSrc,
|
uint32_t sample, stage = S->numStages; /* loop counters */
|
||||||
q31_t * pDst,
|
|
||||||
uint32_t blockSize)
|
|
||||||
{
|
do
|
||||||
q31_t acc = 0; /* accumulator */
|
{
|
||||||
q31_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */
|
/* Reading the coefficients */
|
||||||
q31_t b0, b1, b2, a1, a2; /* Filter coefficients */
|
b0 = *pCoeffs++;
|
||||||
q31_t *pIn = pSrc; /* input pointer initialization */
|
b1 = *pCoeffs++;
|
||||||
q31_t *pOut = pDst; /* output pointer initialization */
|
b2 = *pCoeffs++;
|
||||||
q31_t *pState = S->pState; /* pState pointer initialization */
|
a1 = *pCoeffs++;
|
||||||
q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */
|
a2 = *pCoeffs++;
|
||||||
q31_t Xn; /* temporary input */
|
|
||||||
int32_t shift = (int32_t) S->postShift + 1; /* Shift to be applied to the output */
|
/* Reading the state values */
|
||||||
uint32_t sample, stage = S->numStages; /* loop counters */
|
Xn1 = pState[0];
|
||||||
|
Xn2 = pState[1];
|
||||||
|
Yn1 = pState[2];
|
||||||
do
|
Yn2 = pState[3];
|
||||||
{
|
|
||||||
/* Reading the coefficients */
|
/* Apply loop unrolling and compute 4 output values simultaneously. */
|
||||||
b0 = *pCoeffs++;
|
/* The variables acc ... acc3 hold output values that are being computed:
|
||||||
b1 = *pCoeffs++;
|
*
|
||||||
b2 = *pCoeffs++;
|
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||||
a1 = *pCoeffs++;
|
*/
|
||||||
a2 = *pCoeffs++;
|
|
||||||
|
sample = blockSize >> 2U;
|
||||||
/* Reading the state values */
|
|
||||||
Xn1 = pState[0];
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
Xn2 = pState[1];
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
Yn1 = pState[2];
|
while (sample > 0U)
|
||||||
Yn2 = pState[3];
|
{
|
||||||
|
/* Read the input */
|
||||||
/* Apply loop unrolling and compute 4 output values simultaneously. */
|
Xn = *pIn;
|
||||||
/* The variables acc ... acc3 hold output values that are being computed:
|
|
||||||
*
|
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
/* acc = b0 * x[n] */
|
||||||
*/
|
/*acc = (q31_t) (((q63_t) b1 * Xn1) >> 32);*/
|
||||||
|
mult_32x32_keep32_R(acc, b1, Xn1);
|
||||||
sample = blockSize >> 2u;
|
/* acc += b1 * x[n-1] */
|
||||||
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b0 * (Xn))) >> 32);*/
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
multAcc_32x32_keep32_R(acc, b0, Xn);
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
/* acc += b[2] * x[n-2] */
|
||||||
while(sample > 0u)
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);*/
|
||||||
{
|
multAcc_32x32_keep32_R(acc, b2, Xn2);
|
||||||
/* Read the input */
|
/* acc += a1 * y[n-1] */
|
||||||
Xn = *pIn;
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);*/
|
||||||
|
multAcc_32x32_keep32_R(acc, a1, Yn1);
|
||||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
/* acc += a2 * y[n-2] */
|
||||||
/* acc = b0 * x[n] */
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);*/
|
||||||
//acc = (q31_t) (((q63_t) b1 * Xn1) >> 32);
|
multAcc_32x32_keep32_R(acc, a2, Yn2);
|
||||||
mult_32x32_keep32_R(acc, b1, Xn1);
|
|
||||||
/* acc += b1 * x[n-1] */
|
/* The result is converted to 1.31 , Yn2 variable is reused */
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b0 * (Xn))) >> 32);
|
Yn2 = acc << shift;
|
||||||
multAcc_32x32_keep32_R(acc, b0, Xn);
|
|
||||||
/* acc += b[2] * x[n-2] */
|
/* Read the second input */
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);
|
Xn2 = *(pIn + 1U);
|
||||||
multAcc_32x32_keep32_R(acc, b2, Xn2);
|
|
||||||
/* acc += a1 * y[n-1] */
|
/* Store the output in the destination buffer. */
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);
|
*pOut = Yn2;
|
||||||
multAcc_32x32_keep32_R(acc, a1, Yn1);
|
|
||||||
/* acc += a2 * y[n-2] */
|
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);
|
/* acc = b0 * x[n] */
|
||||||
multAcc_32x32_keep32_R(acc, a2, Yn2);
|
/*acc = (q31_t) (((q63_t) b0 * (Xn2)) >> 32);*/
|
||||||
|
mult_32x32_keep32_R(acc, b0, Xn2);
|
||||||
/* The result is converted to 1.31 , Yn2 variable is reused */
|
/* acc += b1 * x[n-1] */
|
||||||
Yn2 = acc << shift;
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn))) >> 32);*/
|
||||||
|
multAcc_32x32_keep32_R(acc, b1, Xn);
|
||||||
/* Read the second input */
|
/* acc += b[2] * x[n-2] */
|
||||||
Xn2 = *(pIn + 1u);
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn1))) >> 32);*/
|
||||||
|
multAcc_32x32_keep32_R(acc, b2, Xn1);
|
||||||
/* Store the output in the destination buffer. */
|
/* acc += a1 * y[n-1] */
|
||||||
*pOut = Yn2;
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32);*/
|
||||||
|
multAcc_32x32_keep32_R(acc, a1, Yn2);
|
||||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
/* acc += a2 * y[n-2] */
|
||||||
/* acc = b0 * x[n] */
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32);*/
|
||||||
//acc = (q31_t) (((q63_t) b0 * (Xn2)) >> 32);
|
multAcc_32x32_keep32_R(acc, a2, Yn1);
|
||||||
mult_32x32_keep32_R(acc, b0, Xn2);
|
|
||||||
/* acc += b1 * x[n-1] */
|
/* The result is converted to 1.31, Yn1 variable is reused */
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn))) >> 32);
|
Yn1 = acc << shift;
|
||||||
multAcc_32x32_keep32_R(acc, b1, Xn);
|
|
||||||
/* acc += b[2] * x[n-2] */
|
/* Read the third input */
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn1))) >> 32);
|
Xn1 = *(pIn + 2U);
|
||||||
multAcc_32x32_keep32_R(acc, b2, Xn1);
|
|
||||||
/* acc += a1 * y[n-1] */
|
/* Store the output in the destination buffer. */
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32);
|
*(pOut + 1U) = Yn1;
|
||||||
multAcc_32x32_keep32_R(acc, a1, Yn2);
|
|
||||||
/* acc += a2 * y[n-2] */
|
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32);
|
/* acc = b0 * x[n] */
|
||||||
multAcc_32x32_keep32_R(acc, a2, Yn1);
|
/*acc = (q31_t) (((q63_t) b0 * (Xn1)) >> 32);*/
|
||||||
|
mult_32x32_keep32_R(acc, b0, Xn1);
|
||||||
/* The result is converted to 1.31, Yn1 variable is reused */
|
/* acc += b1 * x[n-1] */
|
||||||
Yn1 = acc << shift;
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn2))) >> 32);*/
|
||||||
|
multAcc_32x32_keep32_R(acc, b1, Xn2);
|
||||||
/* Read the third input */
|
/* acc += b[2] * x[n-2] */
|
||||||
Xn1 = *(pIn + 2u);
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn))) >> 32);*/
|
||||||
|
multAcc_32x32_keep32_R(acc, b2, Xn);
|
||||||
/* Store the output in the destination buffer. */
|
/* acc += a1 * y[n-1] */
|
||||||
*(pOut + 1u) = Yn1;
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);*/
|
||||||
|
multAcc_32x32_keep32_R(acc, a1, Yn1);
|
||||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
/* acc += a2 * y[n-2] */
|
||||||
/* acc = b0 * x[n] */
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);*/
|
||||||
//acc = (q31_t) (((q63_t) b0 * (Xn1)) >> 32);
|
multAcc_32x32_keep32_R(acc, a2, Yn2);
|
||||||
mult_32x32_keep32_R(acc, b0, Xn1);
|
|
||||||
/* acc += b1 * x[n-1] */
|
/* The result is converted to 1.31, Yn2 variable is reused */
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn2))) >> 32);
|
Yn2 = acc << shift;
|
||||||
multAcc_32x32_keep32_R(acc, b1, Xn2);
|
|
||||||
/* acc += b[2] * x[n-2] */
|
/* Read the forth input */
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn))) >> 32);
|
Xn = *(pIn + 3U);
|
||||||
multAcc_32x32_keep32_R(acc, b2, Xn);
|
|
||||||
/* acc += a1 * y[n-1] */
|
/* Store the output in the destination buffer. */
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);
|
*(pOut + 2U) = Yn2;
|
||||||
multAcc_32x32_keep32_R(acc, a1, Yn1);
|
pIn += 4U;
|
||||||
/* acc += a2 * y[n-2] */
|
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);
|
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||||
multAcc_32x32_keep32_R(acc, a2, Yn2);
|
/* acc = b0 * x[n] */
|
||||||
|
/*acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32);*/
|
||||||
/* The result is converted to 1.31, Yn2 variable is reused */
|
mult_32x32_keep32_R(acc, b0, Xn);
|
||||||
Yn2 = acc << shift;
|
/* acc += b1 * x[n-1] */
|
||||||
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32);*/
|
||||||
/* Read the forth input */
|
multAcc_32x32_keep32_R(acc, b1, Xn1);
|
||||||
Xn = *(pIn + 3u);
|
/* acc += b[2] * x[n-2] */
|
||||||
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);*/
|
||||||
/* Store the output in the destination buffer. */
|
multAcc_32x32_keep32_R(acc, b2, Xn2);
|
||||||
*(pOut + 2u) = Yn2;
|
/* acc += a1 * y[n-1] */
|
||||||
pIn += 4u;
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32);*/
|
||||||
|
multAcc_32x32_keep32_R(acc, a1, Yn2);
|
||||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
/* acc += a2 * y[n-2] */
|
||||||
/* acc = b0 * x[n] */
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32);*/
|
||||||
//acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32);
|
multAcc_32x32_keep32_R(acc, a2, Yn1);
|
||||||
mult_32x32_keep32_R(acc, b0, Xn);
|
|
||||||
/* acc += b1 * x[n-1] */
|
/* Every time after the output is computed state should be updated. */
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32);
|
/* The states should be updated as: */
|
||||||
multAcc_32x32_keep32_R(acc, b1, Xn1);
|
/* Xn2 = Xn1 */
|
||||||
/* acc += b[2] * x[n-2] */
|
Xn2 = Xn1;
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);
|
|
||||||
multAcc_32x32_keep32_R(acc, b2, Xn2);
|
/* The result is converted to 1.31, Yn1 variable is reused */
|
||||||
/* acc += a1 * y[n-1] */
|
Yn1 = acc << shift;
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn2))) >> 32);
|
|
||||||
multAcc_32x32_keep32_R(acc, a1, Yn2);
|
/* Xn1 = Xn */
|
||||||
/* acc += a2 * y[n-2] */
|
Xn1 = Xn;
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn1))) >> 32);
|
|
||||||
multAcc_32x32_keep32_R(acc, a2, Yn1);
|
/* Store the output in the destination buffer. */
|
||||||
|
*(pOut + 3U) = Yn1;
|
||||||
/* Every time after the output is computed state should be updated. */
|
pOut += 4U;
|
||||||
/* The states should be updated as: */
|
|
||||||
/* Xn2 = Xn1 */
|
/* decrement the loop counter */
|
||||||
Xn2 = Xn1;
|
sample--;
|
||||||
|
}
|
||||||
/* The result is converted to 1.31, Yn1 variable is reused */
|
|
||||||
Yn1 = acc << shift;
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
|
** No loop unrolling is used. */
|
||||||
/* Xn1 = Xn */
|
sample = (blockSize & 0x3U);
|
||||||
Xn1 = Xn;
|
|
||||||
|
while (sample > 0U)
|
||||||
/* Store the output in the destination buffer. */
|
{
|
||||||
*(pOut + 3u) = Yn1;
|
/* Read the input */
|
||||||
pOut += 4u;
|
Xn = *pIn++;
|
||||||
|
|
||||||
/* decrement the loop counter */
|
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||||
sample--;
|
/* acc = b0 * x[n] */
|
||||||
}
|
/*acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32);*/
|
||||||
|
mult_32x32_keep32_R(acc, b0, Xn);
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
/* acc += b1 * x[n-1] */
|
||||||
** No loop unrolling is used. */
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32);*/
|
||||||
sample = (blockSize & 0x3u);
|
multAcc_32x32_keep32_R(acc, b1, Xn1);
|
||||||
|
/* acc += b[2] * x[n-2] */
|
||||||
while(sample > 0u)
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);*/
|
||||||
{
|
multAcc_32x32_keep32_R(acc, b2, Xn2);
|
||||||
/* Read the input */
|
/* acc += a1 * y[n-1] */
|
||||||
Xn = *pIn++;
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);*/
|
||||||
|
multAcc_32x32_keep32_R(acc, a1, Yn1);
|
||||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
/* acc += a2 * y[n-2] */
|
||||||
/* acc = b0 * x[n] */
|
/*acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);*/
|
||||||
//acc = (q31_t) (((q63_t) b0 * (Xn)) >> 32);
|
multAcc_32x32_keep32_R(acc, a2, Yn2);
|
||||||
mult_32x32_keep32_R(acc, b0, Xn);
|
|
||||||
/* acc += b1 * x[n-1] */
|
/* The result is converted to 1.31 */
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b1 * (Xn1))) >> 32);
|
acc = acc << shift;
|
||||||
multAcc_32x32_keep32_R(acc, b1, Xn1);
|
|
||||||
/* acc += b[2] * x[n-2] */
|
/* Every time after the output is computed state should be updated. */
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) b2 * (Xn2))) >> 32);
|
/* The states should be updated as: */
|
||||||
multAcc_32x32_keep32_R(acc, b2, Xn2);
|
/* Xn2 = Xn1 */
|
||||||
/* acc += a1 * y[n-1] */
|
/* Xn1 = Xn */
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a1 * (Yn1))) >> 32);
|
/* Yn2 = Yn1 */
|
||||||
multAcc_32x32_keep32_R(acc, a1, Yn1);
|
/* Yn1 = acc */
|
||||||
/* acc += a2 * y[n-2] */
|
Xn2 = Xn1;
|
||||||
//acc = (q31_t) ((((q63_t) acc << 32) + ((q63_t) a2 * (Yn2))) >> 32);
|
Xn1 = Xn;
|
||||||
multAcc_32x32_keep32_R(acc, a2, Yn2);
|
Yn2 = Yn1;
|
||||||
|
Yn1 = acc;
|
||||||
/* The result is converted to 1.31 */
|
|
||||||
acc = acc << shift;
|
/* Store the output in the destination buffer. */
|
||||||
|
*pOut++ = acc;
|
||||||
/* Every time after the output is computed state should be updated. */
|
|
||||||
/* The states should be updated as: */
|
/* decrement the loop counter */
|
||||||
/* Xn2 = Xn1 */
|
sample--;
|
||||||
/* Xn1 = Xn */
|
}
|
||||||
/* Yn2 = Yn1 */
|
|
||||||
/* Yn1 = acc */
|
/* The first stage goes from the input buffer to the output buffer. */
|
||||||
Xn2 = Xn1;
|
/* Subsequent stages occur in-place in the output buffer */
|
||||||
Xn1 = Xn;
|
pIn = pDst;
|
||||||
Yn2 = Yn1;
|
|
||||||
Yn1 = acc;
|
/* Reset to destination pointer */
|
||||||
|
pOut = pDst;
|
||||||
/* Store the output in the destination buffer. */
|
|
||||||
*pOut++ = acc;
|
/* Store the updated state variables back into the pState array */
|
||||||
|
*pState++ = Xn1;
|
||||||
/* decrement the loop counter */
|
*pState++ = Xn2;
|
||||||
sample--;
|
*pState++ = Yn1;
|
||||||
}
|
*pState++ = Yn2;
|
||||||
|
|
||||||
/* The first stage goes from the input buffer to the output buffer. */
|
} while (--stage);
|
||||||
/* Subsequent stages occur in-place in the output buffer */
|
}
|
||||||
pIn = pDst;
|
|
||||||
|
/**
|
||||||
/* Reset to destination pointer */
|
* @} end of BiquadCascadeDF1 group
|
||||||
pOut = pDst;
|
*/
|
||||||
|
|
||||||
/* Store the updated state variables back into the pState array */
|
|
||||||
*pState++ = Xn1;
|
|
||||||
*pState++ = Xn2;
|
|
||||||
*pState++ = Yn1;
|
|
||||||
*pState++ = Yn2;
|
|
||||||
|
|
||||||
} while(--stage);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BiquadCascadeDF1 group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,109 +1,97 @@
|
||||||
/*-----------------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_biquad_cascade_df1_init_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Floating-point Biquad cascade DirectFormI(DF1) filter initialization function
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_biquad_cascade_df1_init_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: floating-point Biquad cascade DirectFormI(DF1) filter initialization function.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupFilters
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup BiquadCascadeDF1
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* ---------------------------------------------------------------------------*/
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @details
|
||||||
|
* @brief Initialization function for the floating-point Biquad cascade filter.
|
||||||
/**
|
* @param[in,out] *S points to an instance of the floating-point Biquad cascade structure.
|
||||||
* @ingroup groupFilters
|
* @param[in] numStages number of 2nd order stages in the filter.
|
||||||
*/
|
* @param[in] *pCoeffs points to the filter coefficients array.
|
||||||
|
* @param[in] *pState points to the state array.
|
||||||
/**
|
* @return none
|
||||||
* @addtogroup BiquadCascadeDF1
|
*
|
||||||
* @{
|
*
|
||||||
*/
|
* <b>Coefficient and State Ordering:</b>
|
||||||
|
*
|
||||||
/**
|
* \par
|
||||||
* @details
|
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
||||||
* @brief Initialization function for the floating-point Biquad cascade filter.
|
* <pre>
|
||||||
* @param[in,out] *S points to an instance of the floating-point Biquad cascade structure.
|
* {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
|
||||||
* @param[in] numStages number of 2nd order stages in the filter.
|
* </pre>
|
||||||
* @param[in] *pCoeffs points to the filter coefficients array.
|
*
|
||||||
* @param[in] *pState points to the state array.
|
* \par
|
||||||
* @return none
|
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
||||||
*
|
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
||||||
*
|
* and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
|
||||||
* <b>Coefficient and State Ordering:</b>
|
*
|
||||||
*
|
* \par
|
||||||
* \par
|
* The <code>pState</code> is a pointer to state array.
|
||||||
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
* Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
|
||||||
* <pre>
|
* The state variables are arranged in the <code>pState</code> array as:
|
||||||
* {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
|
* <pre>
|
||||||
* </pre>
|
* {x[n-1], x[n-2], y[n-1], y[n-2]}
|
||||||
*
|
* </pre>
|
||||||
* \par
|
* The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
|
||||||
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
* The state array has a total length of <code>4*numStages</code> values.
|
||||||
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
||||||
* and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
|
*
|
||||||
*
|
*/
|
||||||
* \par
|
|
||||||
* The <code>pState</code> is a pointer to state array.
|
void arm_biquad_cascade_df1_init_f32(
|
||||||
* Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
|
arm_biquad_casd_df1_inst_f32 * S,
|
||||||
* The state variables are arranged in the <code>pState</code> array as:
|
uint8_t numStages,
|
||||||
* <pre>
|
float32_t * pCoeffs,
|
||||||
* {x[n-1], x[n-2], y[n-1], y[n-2]}
|
float32_t * pState)
|
||||||
* </pre>
|
{
|
||||||
* The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
|
/* Assign filter stages */
|
||||||
* The state array has a total length of <code>4*numStages</code> values.
|
S->numStages = numStages;
|
||||||
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
|
||||||
*
|
/* Assign coefficient pointer */
|
||||||
*/
|
S->pCoeffs = pCoeffs;
|
||||||
|
|
||||||
void arm_biquad_cascade_df1_init_f32(
|
/* Clear state buffer and size is always 4 * numStages */
|
||||||
arm_biquad_casd_df1_inst_f32 * S,
|
memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(float32_t));
|
||||||
uint8_t numStages,
|
|
||||||
float32_t * pCoeffs,
|
/* Assign state pointer */
|
||||||
float32_t * pState)
|
S->pState = pState;
|
||||||
{
|
}
|
||||||
/* Assign filter stages */
|
|
||||||
S->numStages = numStages;
|
/**
|
||||||
|
* @} end of BiquadCascadeDF1 group
|
||||||
/* Assign coefficient pointer */
|
*/
|
||||||
S->pCoeffs = pCoeffs;
|
|
||||||
|
|
||||||
/* Clear state buffer and size is always 4 * numStages */
|
|
||||||
memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(float32_t));
|
|
||||||
|
|
||||||
/* Assign state pointer */
|
|
||||||
S->pState = pState;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BiquadCascadeDF1 group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,111 +1,99 @@
|
||||||
/*-----------------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_biquad_cascade_df1_init_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q15 Biquad cascade DirectFormI(DF1) filter initialization function
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_biquad_cascade_df1_init_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q15 Biquad cascade DirectFormI(DF1) filter initialization function.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupFilters
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup BiquadCascadeDF1
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* ---------------------------------------------------------------------------*/
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @details
|
||||||
|
*
|
||||||
/**
|
* @param[in,out] *S points to an instance of the Q15 Biquad cascade structure.
|
||||||
* @ingroup groupFilters
|
* @param[in] numStages number of 2nd order stages in the filter.
|
||||||
*/
|
* @param[in] *pCoeffs points to the filter coefficients.
|
||||||
|
* @param[in] *pState points to the state buffer.
|
||||||
/**
|
* @param[in] postShift Shift to be applied to the accumulator result. Varies according to the coefficients format
|
||||||
* @addtogroup BiquadCascadeDF1
|
* @return none
|
||||||
* @{
|
*
|
||||||
*/
|
* <b>Coefficient and State Ordering:</b>
|
||||||
|
*
|
||||||
/**
|
* \par
|
||||||
* @details
|
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
||||||
*
|
* <pre>
|
||||||
* @param[in,out] *S points to an instance of the Q15 Biquad cascade structure.
|
* {b10, 0, b11, b12, a11, a12, b20, 0, b21, b22, a21, a22, ...}
|
||||||
* @param[in] numStages number of 2nd order stages in the filter.
|
* </pre>
|
||||||
* @param[in] *pCoeffs points to the filter coefficients.
|
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
||||||
* @param[in] *pState points to the state buffer.
|
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
||||||
* @param[in] postShift Shift to be applied to the accumulator result. Varies according to the coefficients format
|
* and so on. The <code>pCoeffs</code> array contains a total of <code>6*numStages</code> values.
|
||||||
* @return none
|
* The zero coefficient between <code>b1</code> and <code>b2</code> facilities use of 16-bit SIMD instructions on the Cortex-M4.
|
||||||
*
|
*
|
||||||
* <b>Coefficient and State Ordering:</b>
|
* \par
|
||||||
*
|
* The state variables are stored in the array <code>pState</code>.
|
||||||
* \par
|
* Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
|
||||||
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
* The state variables are arranged in the <code>pState</code> array as:
|
||||||
* <pre>
|
* <pre>
|
||||||
* {b10, 0, b11, b12, a11, a12, b20, 0, b21, b22, a21, a22, ...}
|
* {x[n-1], x[n-2], y[n-1], y[n-2]}
|
||||||
* </pre>
|
* </pre>
|
||||||
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
* The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
|
||||||
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
* The state array has a total length of <code>4*numStages</code> values.
|
||||||
* and so on. The <code>pCoeffs</code> array contains a total of <code>6*numStages</code> values.
|
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
||||||
* The zero coefficient between <code>b1</code> and <code>b2</code> facilities use of 16-bit SIMD instructions on the Cortex-M4.
|
*/
|
||||||
*
|
|
||||||
* \par
|
void arm_biquad_cascade_df1_init_q15(
|
||||||
* The state variables are stored in the array <code>pState</code>.
|
arm_biquad_casd_df1_inst_q15 * S,
|
||||||
* Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
|
uint8_t numStages,
|
||||||
* The state variables are arranged in the <code>pState</code> array as:
|
q15_t * pCoeffs,
|
||||||
* <pre>
|
q15_t * pState,
|
||||||
* {x[n-1], x[n-2], y[n-1], y[n-2]}
|
int8_t postShift)
|
||||||
* </pre>
|
{
|
||||||
* The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
|
/* Assign filter stages */
|
||||||
* The state array has a total length of <code>4*numStages</code> values.
|
S->numStages = numStages;
|
||||||
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
|
||||||
*/
|
/* Assign postShift to be applied to the output */
|
||||||
|
S->postShift = postShift;
|
||||||
void arm_biquad_cascade_df1_init_q15(
|
|
||||||
arm_biquad_casd_df1_inst_q15 * S,
|
/* Assign coefficient pointer */
|
||||||
uint8_t numStages,
|
S->pCoeffs = pCoeffs;
|
||||||
q15_t * pCoeffs,
|
|
||||||
q15_t * pState,
|
/* Clear state buffer and size is always 4 * numStages */
|
||||||
int8_t postShift)
|
memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(q15_t));
|
||||||
{
|
|
||||||
/* Assign filter stages */
|
/* Assign state pointer */
|
||||||
S->numStages = numStages;
|
S->pState = pState;
|
||||||
|
}
|
||||||
/* Assign postShift to be applied to the output */
|
|
||||||
S->postShift = postShift;
|
/**
|
||||||
|
* @} end of BiquadCascadeDF1 group
|
||||||
/* Assign coefficient pointer */
|
*/
|
||||||
S->pCoeffs = pCoeffs;
|
|
||||||
|
|
||||||
/* Clear state buffer and size is always 4 * numStages */
|
|
||||||
memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(q15_t));
|
|
||||||
|
|
||||||
/* Assign state pointer */
|
|
||||||
S->pState = pState;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BiquadCascadeDF1 group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,111 +1,98 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_biquad_cascade_df1_init_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Q31 Biquad cascade DirectFormI(DF1) filter initialization function
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_biquad_cascade_df1_init_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Q31 Biquad cascade DirectFormI(DF1) filter initialization function.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
*
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
*
|
||||||
*
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* Redistribution and use in source and binary forms, with or without
|
*
|
||||||
* modification, are permitted provided that the following conditions
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* are met:
|
* not use this file except in compliance with the License.
|
||||||
* - Redistributions of source code must retain the above copyright
|
* You may obtain a copy of the License at
|
||||||
* notice, this list of conditions and the following disclaimer.
|
*
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* notice, this list of conditions and the following disclaimer in
|
*
|
||||||
* the documentation and/or other materials provided with the
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distribution.
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* may be used to endorse or promote products derived from this
|
* See the License for the specific language governing permissions and
|
||||||
* software without specific prior written permission.
|
* limitations under the License.
|
||||||
*
|
*/
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
#include "arm_math.h"
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
/**
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
* @ingroup groupFilters
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
*/
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
/**
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
* @addtogroup BiquadCascadeDF1
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @{
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*/
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
* -------------------------------------------------------------------- */
|
/**
|
||||||
|
* @details
|
||||||
#include "arm_math.h"
|
*
|
||||||
|
* @param[in,out] *S points to an instance of the Q31 Biquad cascade structure.
|
||||||
/**
|
* @param[in] numStages number of 2nd order stages in the filter.
|
||||||
* @ingroup groupFilters
|
* @param[in] *pCoeffs points to the filter coefficients buffer.
|
||||||
*/
|
* @param[in] *pState points to the state buffer.
|
||||||
|
* @param[in] postShift Shift to be applied after the accumulator. Varies according to the coefficients format
|
||||||
/**
|
* @return none
|
||||||
* @addtogroup BiquadCascadeDF1
|
*
|
||||||
* @{
|
* <b>Coefficient and State Ordering:</b>
|
||||||
*/
|
*
|
||||||
|
* \par
|
||||||
/**
|
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
||||||
* @details
|
* <pre>
|
||||||
*
|
* {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
|
||||||
* @param[in,out] *S points to an instance of the Q31 Biquad cascade structure.
|
* </pre>
|
||||||
* @param[in] numStages number of 2nd order stages in the filter.
|
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
||||||
* @param[in] *pCoeffs points to the filter coefficients buffer.
|
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
||||||
* @param[in] *pState points to the state buffer.
|
* and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
|
||||||
* @param[in] postShift Shift to be applied after the accumulator. Varies according to the coefficients format
|
*
|
||||||
* @return none
|
* \par
|
||||||
*
|
* The <code>pState</code> points to state variables array.
|
||||||
* <b>Coefficient and State Ordering:</b>
|
* Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
|
||||||
*
|
* The state variables are arranged in the <code>pState</code> array as:
|
||||||
* \par
|
* <pre>
|
||||||
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
* {x[n-1], x[n-2], y[n-1], y[n-2]}
|
||||||
* <pre>
|
* </pre>
|
||||||
* {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
|
* The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
|
||||||
* </pre>
|
* The state array has a total length of <code>4*numStages</code> values.
|
||||||
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
||||||
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
*/
|
||||||
* and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
|
|
||||||
*
|
void arm_biquad_cascade_df1_init_q31(
|
||||||
* \par
|
arm_biquad_casd_df1_inst_q31 * S,
|
||||||
* The <code>pState</code> points to state variables array.
|
uint8_t numStages,
|
||||||
* Each Biquad stage has 4 state variables <code>x[n-1], x[n-2], y[n-1],</code> and <code>y[n-2]</code>.
|
q31_t * pCoeffs,
|
||||||
* The state variables are arranged in the <code>pState</code> array as:
|
q31_t * pState,
|
||||||
* <pre>
|
int8_t postShift)
|
||||||
* {x[n-1], x[n-2], y[n-1], y[n-2]}
|
{
|
||||||
* </pre>
|
/* Assign filter stages */
|
||||||
* The 4 state variables for stage 1 are first, then the 4 state variables for stage 2, and so on.
|
S->numStages = numStages;
|
||||||
* The state array has a total length of <code>4*numStages</code> values.
|
|
||||||
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
/* Assign postShift to be applied to the output */
|
||||||
*/
|
S->postShift = postShift;
|
||||||
|
|
||||||
void arm_biquad_cascade_df1_init_q31(
|
/* Assign coefficient pointer */
|
||||||
arm_biquad_casd_df1_inst_q31 * S,
|
S->pCoeffs = pCoeffs;
|
||||||
uint8_t numStages,
|
|
||||||
q31_t * pCoeffs,
|
/* Clear state buffer and size is always 4 * numStages */
|
||||||
q31_t * pState,
|
memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(q31_t));
|
||||||
int8_t postShift)
|
|
||||||
{
|
/* Assign state pointer */
|
||||||
/* Assign filter stages */
|
S->pState = pState;
|
||||||
S->numStages = numStages;
|
}
|
||||||
|
|
||||||
/* Assign postShift to be applied to the output */
|
/**
|
||||||
S->postShift = postShift;
|
* @} end of BiquadCascadeDF1 group
|
||||||
|
*/
|
||||||
/* Assign coefficient pointer */
|
|
||||||
S->pCoeffs = pCoeffs;
|
|
||||||
|
|
||||||
/* Clear state buffer and size is always 4 * numStages */
|
|
||||||
memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(q31_t));
|
|
||||||
|
|
||||||
/* Assign state pointer */
|
|
||||||
S->pState = pState;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BiquadCascadeDF1 group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,411 +1,398 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_biquad_cascade_df1_q15.c
|
||||||
* $Date: 12. March 2014
|
* Description: Processing function for the Q15 Biquad cascade DirectFormI(DF1) filter
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_biquad_cascade_df1_q15.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Processing function for the
|
* -------------------------------------------------------------------- */
|
||||||
* Q15 Biquad cascade DirectFormI(DF1) filter.
|
/*
|
||||||
*
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
*
|
||||||
*
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* Redistribution and use in source and binary forms, with or without
|
*
|
||||||
* modification, are permitted provided that the following conditions
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* are met:
|
* not use this file except in compliance with the License.
|
||||||
* - Redistributions of source code must retain the above copyright
|
* You may obtain a copy of the License at
|
||||||
* notice, this list of conditions and the following disclaimer.
|
*
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* notice, this list of conditions and the following disclaimer in
|
*
|
||||||
* the documentation and/or other materials provided with the
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distribution.
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* may be used to endorse or promote products derived from this
|
* See the License for the specific language governing permissions and
|
||||||
* software without specific prior written permission.
|
* limitations under the License.
|
||||||
*
|
*/
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
#include "arm_math.h"
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
/**
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
* @ingroup groupFilters
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
*/
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
/**
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
* @addtogroup BiquadCascadeDF1
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @{
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*/
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
* -------------------------------------------------------------------- */
|
/**
|
||||||
|
* @brief Processing function for the Q15 Biquad cascade filter.
|
||||||
#include "arm_math.h"
|
* @param[in] *S points to an instance of the Q15 Biquad cascade structure.
|
||||||
|
* @param[in] *pSrc points to the block of input data.
|
||||||
/**
|
* @param[out] *pDst points to the location where the output result is written.
|
||||||
* @ingroup groupFilters
|
* @param[in] blockSize number of samples to process per call.
|
||||||
*/
|
* @return none.
|
||||||
|
*
|
||||||
/**
|
*
|
||||||
* @addtogroup BiquadCascadeDF1
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @{
|
* \par
|
||||||
*/
|
* The function is implemented using a 64-bit internal accumulator.
|
||||||
|
* Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
|
||||||
/**
|
* The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
|
||||||
* @brief Processing function for the Q15 Biquad cascade filter.
|
* There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
|
||||||
* @param[in] *S points to an instance of the Q15 Biquad cascade structure.
|
* The accumulator is then shifted by <code>postShift</code> bits to truncate the result to 1.15 format by discarding the low 16 bits.
|
||||||
* @param[in] *pSrc points to the block of input data.
|
* Finally, the result is saturated to 1.15 format.
|
||||||
* @param[out] *pDst points to the location where the output result is written.
|
*
|
||||||
* @param[in] blockSize number of samples to process per call.
|
* \par
|
||||||
* @return none.
|
* Refer to the function <code>arm_biquad_cascade_df1_fast_q15()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4.
|
||||||
*
|
*/
|
||||||
*
|
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
void arm_biquad_cascade_df1_q15(
|
||||||
* \par
|
const arm_biquad_casd_df1_inst_q15 * S,
|
||||||
* The function is implemented using a 64-bit internal accumulator.
|
q15_t * pSrc,
|
||||||
* Both coefficients and state variables are represented in 1.15 format and multiplications yield a 2.30 result.
|
q15_t * pDst,
|
||||||
* The 2.30 intermediate results are accumulated in a 64-bit accumulator in 34.30 format.
|
uint32_t blockSize)
|
||||||
* There is no risk of internal overflow with this approach and the full precision of intermediate multiplications is preserved.
|
{
|
||||||
* The accumulator is then shifted by <code>postShift</code> bits to truncate the result to 1.15 format by discarding the low 16 bits.
|
|
||||||
* Finally, the result is saturated to 1.15 format.
|
|
||||||
*
|
#if defined (ARM_MATH_DSP)
|
||||||
* \par
|
|
||||||
* Refer to the function <code>arm_biquad_cascade_df1_fast_q15()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4.
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
*/
|
|
||||||
|
q15_t *pIn = pSrc; /* Source pointer */
|
||||||
void arm_biquad_cascade_df1_q15(
|
q15_t *pOut = pDst; /* Destination pointer */
|
||||||
const arm_biquad_casd_df1_inst_q15 * S,
|
q31_t in; /* Temporary variable to hold input value */
|
||||||
q15_t * pSrc,
|
q31_t out; /* Temporary variable to hold output value */
|
||||||
q15_t * pDst,
|
q31_t b0; /* Temporary variable to hold bo value */
|
||||||
uint32_t blockSize)
|
q31_t b1, a1; /* Filter coefficients */
|
||||||
{
|
q31_t state_in, state_out; /* Filter state variables */
|
||||||
|
q31_t acc_l, acc_h;
|
||||||
|
q63_t acc; /* Accumulator */
|
||||||
#ifndef ARM_MATH_CM0_FAMILY
|
int32_t lShift = (15 - (int32_t) S->postShift); /* Post shift */
|
||||||
|
q15_t *pState = S->pState; /* State pointer */
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
|
||||||
|
uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */
|
||||||
q15_t *pIn = pSrc; /* Source pointer */
|
int32_t uShift = (32 - lShift);
|
||||||
q15_t *pOut = pDst; /* Destination pointer */
|
|
||||||
q31_t in; /* Temporary variable to hold input value */
|
do
|
||||||
q31_t out; /* Temporary variable to hold output value */
|
{
|
||||||
q31_t b0; /* Temporary variable to hold bo value */
|
/* Read the b0 and 0 coefficients using SIMD */
|
||||||
q31_t b1, a1; /* Filter coefficients */
|
b0 = *__SIMD32(pCoeffs)++;
|
||||||
q31_t state_in, state_out; /* Filter state variables */
|
|
||||||
q31_t acc_l, acc_h;
|
/* Read the b1 and b2 coefficients using SIMD */
|
||||||
q63_t acc; /* Accumulator */
|
b1 = *__SIMD32(pCoeffs)++;
|
||||||
int32_t lShift = (15 - (int32_t) S->postShift); /* Post shift */
|
|
||||||
q15_t *pState = S->pState; /* State pointer */
|
/* Read the a1 and a2 coefficients using SIMD */
|
||||||
q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
|
a1 = *__SIMD32(pCoeffs)++;
|
||||||
uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */
|
|
||||||
int32_t uShift = (32 - lShift);
|
/* Read the input state values from the state buffer: x[n-1], x[n-2] */
|
||||||
|
state_in = *__SIMD32(pState)++;
|
||||||
do
|
|
||||||
{
|
/* Read the output state values from the state buffer: y[n-1], y[n-2] */
|
||||||
/* Read the b0 and 0 coefficients using SIMD */
|
state_out = *__SIMD32(pState)--;
|
||||||
b0 = *__SIMD32(pCoeffs)++;
|
|
||||||
|
/* Apply loop unrolling and compute 2 output values simultaneously. */
|
||||||
/* Read the b1 and b2 coefficients using SIMD */
|
/* The variable acc hold output values that are being computed:
|
||||||
b1 = *__SIMD32(pCoeffs)++;
|
*
|
||||||
|
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||||
/* Read the a1 and a2 coefficients using SIMD */
|
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||||
a1 = *__SIMD32(pCoeffs)++;
|
*/
|
||||||
|
sample = blockSize >> 1U;
|
||||||
/* Read the input state values from the state buffer: x[n-1], x[n-2] */
|
|
||||||
state_in = *__SIMD32(pState)++;
|
/* First part of the processing with loop unrolling. Compute 2 outputs at a time.
|
||||||
|
** a second loop below computes the remaining 1 sample. */
|
||||||
/* Read the output state values from the state buffer: y[n-1], y[n-2] */
|
while (sample > 0U)
|
||||||
state_out = *__SIMD32(pState)--;
|
{
|
||||||
|
|
||||||
/* Apply loop unrolling and compute 2 output values simultaneously. */
|
/* Read the input */
|
||||||
/* The variable acc hold output values that are being computed:
|
in = *__SIMD32(pIn)++;
|
||||||
*
|
|
||||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
/* out = b0 * x[n] + 0 * 0 */
|
||||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
out = __SMUAD(b0, in);
|
||||||
*/
|
|
||||||
sample = blockSize >> 1u;
|
/* acc += b1 * x[n-1] + b2 * x[n-2] + out */
|
||||||
|
acc = __SMLALD(b1, state_in, out);
|
||||||
/* First part of the processing with loop unrolling. Compute 2 outputs at a time.
|
/* acc += a1 * y[n-1] + a2 * y[n-2] */
|
||||||
** a second loop below computes the remaining 1 sample. */
|
acc = __SMLALD(a1, state_out, acc);
|
||||||
while(sample > 0u)
|
|
||||||
{
|
/* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
|
||||||
|
/* Calc lower part of acc */
|
||||||
/* Read the input */
|
acc_l = acc & 0xffffffff;
|
||||||
in = *__SIMD32(pIn)++;
|
|
||||||
|
/* Calc upper part of acc */
|
||||||
/* out = b0 * x[n] + 0 * 0 */
|
acc_h = (acc >> 32) & 0xffffffff;
|
||||||
out = __SMUAD(b0, in);
|
|
||||||
|
/* Apply shift for lower part of acc and upper part of acc */
|
||||||
/* acc += b1 * x[n-1] + b2 * x[n-2] + out */
|
out = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||||
acc = __SMLALD(b1, state_in, out);
|
|
||||||
/* acc += a1 * y[n-1] + a2 * y[n-2] */
|
out = __SSAT(out, 16);
|
||||||
acc = __SMLALD(a1, state_out, acc);
|
|
||||||
|
/* Every time after the output is computed state should be updated. */
|
||||||
/* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
|
/* The states should be updated as: */
|
||||||
/* Calc lower part of acc */
|
/* Xn2 = Xn1 */
|
||||||
acc_l = acc & 0xffffffff;
|
/* Xn1 = Xn */
|
||||||
|
/* Yn2 = Yn1 */
|
||||||
/* Calc upper part of acc */
|
/* Yn1 = acc */
|
||||||
acc_h = (acc >> 32) & 0xffffffff;
|
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
|
||||||
|
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
|
||||||
/* Apply shift for lower part of acc and upper part of acc */
|
|
||||||
out = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
|
|
||||||
out = __SSAT(out, 16);
|
state_in = __PKHBT(in, state_in, 16);
|
||||||
|
state_out = __PKHBT(out, state_out, 16);
|
||||||
/* Every time after the output is computed state should be updated. */
|
|
||||||
/* The states should be updated as: */
|
#else
|
||||||
/* Xn2 = Xn1 */
|
|
||||||
/* Xn1 = Xn */
|
state_in = __PKHBT(state_in >> 16, (in >> 16), 16);
|
||||||
/* Yn2 = Yn1 */
|
state_out = __PKHBT(state_out >> 16, (out), 16);
|
||||||
/* Yn1 = acc */
|
|
||||||
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
|
|
||||||
|
/* out = b0 * x[n] + 0 * 0 */
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
out = __SMUADX(b0, in);
|
||||||
|
/* acc += b1 * x[n-1] + b2 * x[n-2] + out */
|
||||||
state_in = __PKHBT(in, state_in, 16);
|
acc = __SMLALD(b1, state_in, out);
|
||||||
state_out = __PKHBT(out, state_out, 16);
|
/* acc += a1 * y[n-1] + a2 * y[n-2] */
|
||||||
|
acc = __SMLALD(a1, state_out, acc);
|
||||||
#else
|
|
||||||
|
/* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
|
||||||
state_in = __PKHBT(state_in >> 16, (in >> 16), 16);
|
/* Calc lower part of acc */
|
||||||
state_out = __PKHBT(state_out >> 16, (out), 16);
|
acc_l = acc & 0xffffffff;
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
/* Calc upper part of acc */
|
||||||
|
acc_h = (acc >> 32) & 0xffffffff;
|
||||||
/* out = b0 * x[n] + 0 * 0 */
|
|
||||||
out = __SMUADX(b0, in);
|
/* Apply shift for lower part of acc and upper part of acc */
|
||||||
/* acc += b1 * x[n-1] + b2 * x[n-2] + out */
|
out = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||||
acc = __SMLALD(b1, state_in, out);
|
|
||||||
/* acc += a1 * y[n-1] + a2 * y[n-2] */
|
out = __SSAT(out, 16);
|
||||||
acc = __SMLALD(a1, state_out, acc);
|
|
||||||
|
/* Store the output in the destination buffer. */
|
||||||
/* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
|
|
||||||
/* Calc lower part of acc */
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
acc_l = acc & 0xffffffff;
|
|
||||||
|
*__SIMD32(pOut)++ = __PKHBT(state_out, out, 16);
|
||||||
/* Calc upper part of acc */
|
|
||||||
acc_h = (acc >> 32) & 0xffffffff;
|
#else
|
||||||
|
|
||||||
/* Apply shift for lower part of acc and upper part of acc */
|
*__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16);
|
||||||
out = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
|
||||||
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
out = __SSAT(out, 16);
|
|
||||||
|
/* Every time after the output is computed state should be updated. */
|
||||||
/* Store the output in the destination buffer. */
|
/* The states should be updated as: */
|
||||||
|
/* Xn2 = Xn1 */
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
/* Xn1 = Xn */
|
||||||
|
/* Yn2 = Yn1 */
|
||||||
*__SIMD32(pOut)++ = __PKHBT(state_out, out, 16);
|
/* Yn1 = acc */
|
||||||
|
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
|
||||||
#else
|
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
|
||||||
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
*__SIMD32(pOut)++ = __PKHBT(out, state_out >> 16, 16);
|
|
||||||
|
state_in = __PKHBT(in >> 16, state_in, 16);
|
||||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
state_out = __PKHBT(out, state_out, 16);
|
||||||
|
|
||||||
/* Every time after the output is computed state should be updated. */
|
#else
|
||||||
/* The states should be updated as: */
|
|
||||||
/* Xn2 = Xn1 */
|
state_in = __PKHBT(state_in >> 16, in, 16);
|
||||||
/* Xn1 = Xn */
|
state_out = __PKHBT(state_out >> 16, out, 16);
|
||||||
/* Yn2 = Yn1 */
|
|
||||||
/* Yn1 = acc */
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
|
|
||||||
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
|
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
/* Decrement the loop counter */
|
||||||
|
sample--;
|
||||||
state_in = __PKHBT(in >> 16, state_in, 16);
|
|
||||||
state_out = __PKHBT(out, state_out, 16);
|
}
|
||||||
|
|
||||||
#else
|
/* If the blockSize is not a multiple of 2, compute any remaining output samples here.
|
||||||
|
** No loop unrolling is used. */
|
||||||
state_in = __PKHBT(state_in >> 16, in, 16);
|
|
||||||
state_out = __PKHBT(state_out >> 16, out, 16);
|
if ((blockSize & 0x1U) != 0U)
|
||||||
|
{
|
||||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
/* Read the input */
|
||||||
|
in = *pIn++;
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
/* out = b0 * x[n] + 0 * 0 */
|
||||||
sample--;
|
|
||||||
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
}
|
|
||||||
|
out = __SMUAD(b0, in);
|
||||||
/* If the blockSize is not a multiple of 2, compute any remaining output samples here.
|
|
||||||
** No loop unrolling is used. */
|
#else
|
||||||
|
|
||||||
if((blockSize & 0x1u) != 0u)
|
out = __SMUADX(b0, in);
|
||||||
{
|
|
||||||
/* Read the input */
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
in = *pIn++;
|
|
||||||
|
/* acc = b1 * x[n-1] + b2 * x[n-2] + out */
|
||||||
/* out = b0 * x[n] + 0 * 0 */
|
acc = __SMLALD(b1, state_in, out);
|
||||||
|
/* acc += a1 * y[n-1] + a2 * y[n-2] */
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
acc = __SMLALD(a1, state_out, acc);
|
||||||
|
|
||||||
out = __SMUAD(b0, in);
|
/* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
|
||||||
|
/* Calc lower part of acc */
|
||||||
#else
|
acc_l = acc & 0xffffffff;
|
||||||
|
|
||||||
out = __SMUADX(b0, in);
|
/* Calc upper part of acc */
|
||||||
|
acc_h = (acc >> 32) & 0xffffffff;
|
||||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
|
||||||
|
/* Apply shift for lower part of acc and upper part of acc */
|
||||||
/* acc = b1 * x[n-1] + b2 * x[n-2] + out */
|
out = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||||
acc = __SMLALD(b1, state_in, out);
|
|
||||||
/* acc += a1 * y[n-1] + a2 * y[n-2] */
|
out = __SSAT(out, 16);
|
||||||
acc = __SMLALD(a1, state_out, acc);
|
|
||||||
|
/* Store the output in the destination buffer. */
|
||||||
/* The result is converted from 3.29 to 1.31 if postShift = 1, and then saturation is applied */
|
*pOut++ = (q15_t) out;
|
||||||
/* Calc lower part of acc */
|
|
||||||
acc_l = acc & 0xffffffff;
|
/* Every time after the output is computed state should be updated. */
|
||||||
|
/* The states should be updated as: */
|
||||||
/* Calc upper part of acc */
|
/* Xn2 = Xn1 */
|
||||||
acc_h = (acc >> 32) & 0xffffffff;
|
/* Xn1 = Xn */
|
||||||
|
/* Yn2 = Yn1 */
|
||||||
/* Apply shift for lower part of acc and upper part of acc */
|
/* Yn1 = acc */
|
||||||
out = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
|
||||||
|
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
|
||||||
out = __SSAT(out, 16);
|
|
||||||
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
/* Store the output in the destination buffer. */
|
|
||||||
*pOut++ = (q15_t) out;
|
state_in = __PKHBT(in, state_in, 16);
|
||||||
|
state_out = __PKHBT(out, state_out, 16);
|
||||||
/* Every time after the output is computed state should be updated. */
|
|
||||||
/* The states should be updated as: */
|
#else
|
||||||
/* Xn2 = Xn1 */
|
|
||||||
/* Xn1 = Xn */
|
state_in = __PKHBT(state_in >> 16, in, 16);
|
||||||
/* Yn2 = Yn1 */
|
state_out = __PKHBT(state_out >> 16, out, 16);
|
||||||
/* Yn1 = acc */
|
|
||||||
/* x[n-N], x[n-N-1] are packed together to make state_in of type q31 */
|
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
||||||
/* y[n-N], y[n-N-1] are packed together to make state_out of type q31 */
|
|
||||||
|
}
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
|
||||||
|
/* The first stage goes from the input wire to the output wire. */
|
||||||
state_in = __PKHBT(in, state_in, 16);
|
/* Subsequent numStages occur in-place in the output wire */
|
||||||
state_out = __PKHBT(out, state_out, 16);
|
pIn = pDst;
|
||||||
|
|
||||||
#else
|
/* Reset the output pointer */
|
||||||
|
pOut = pDst;
|
||||||
state_in = __PKHBT(state_in >> 16, in, 16);
|
|
||||||
state_out = __PKHBT(state_out >> 16, out, 16);
|
/* Store the updated state variables back into the state array */
|
||||||
|
*__SIMD32(pState)++ = state_in;
|
||||||
#endif /* #ifndef ARM_MATH_BIG_ENDIAN */
|
*__SIMD32(pState)++ = state_out;
|
||||||
|
|
||||||
}
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
/* The first stage goes from the input wire to the output wire. */
|
stage--;
|
||||||
/* Subsequent numStages occur in-place in the output wire */
|
|
||||||
pIn = pDst;
|
} while (stage > 0U);
|
||||||
|
|
||||||
/* Reset the output pointer */
|
#else
|
||||||
pOut = pDst;
|
|
||||||
|
/* Run the below code for Cortex-M0 */
|
||||||
/* Store the updated state variables back into the state array */
|
|
||||||
*__SIMD32(pState)++ = state_in;
|
q15_t *pIn = pSrc; /* Source pointer */
|
||||||
*__SIMD32(pState)++ = state_out;
|
q15_t *pOut = pDst; /* Destination pointer */
|
||||||
|
q15_t b0, b1, b2, a1, a2; /* Filter coefficients */
|
||||||
|
q15_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */
|
||||||
/* Decrement the loop counter */
|
q15_t Xn; /* temporary input */
|
||||||
stage--;
|
q63_t acc; /* Accumulator */
|
||||||
|
int32_t shift = (15 - (int32_t) S->postShift); /* Post shift */
|
||||||
} while(stage > 0u);
|
q15_t *pState = S->pState; /* State pointer */
|
||||||
|
q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
|
||||||
#else
|
uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */
|
||||||
|
|
||||||
/* Run the below code for Cortex-M0 */
|
do
|
||||||
|
{
|
||||||
q15_t *pIn = pSrc; /* Source pointer */
|
/* Reading the coefficients */
|
||||||
q15_t *pOut = pDst; /* Destination pointer */
|
b0 = *pCoeffs++;
|
||||||
q15_t b0, b1, b2, a1, a2; /* Filter coefficients */
|
pCoeffs++; // skip the 0 coefficient
|
||||||
q15_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */
|
b1 = *pCoeffs++;
|
||||||
q15_t Xn; /* temporary input */
|
b2 = *pCoeffs++;
|
||||||
q63_t acc; /* Accumulator */
|
a1 = *pCoeffs++;
|
||||||
int32_t shift = (15 - (int32_t) S->postShift); /* Post shift */
|
a2 = *pCoeffs++;
|
||||||
q15_t *pState = S->pState; /* State pointer */
|
|
||||||
q15_t *pCoeffs = S->pCoeffs; /* Coefficient pointer */
|
/* Reading the state values */
|
||||||
uint32_t sample, stage = (uint32_t) S->numStages; /* Stage loop counter */
|
Xn1 = pState[0];
|
||||||
|
Xn2 = pState[1];
|
||||||
do
|
Yn1 = pState[2];
|
||||||
{
|
Yn2 = pState[3];
|
||||||
/* Reading the coefficients */
|
|
||||||
b0 = *pCoeffs++;
|
/* The variables acc holds the output value that is computed:
|
||||||
pCoeffs++; // skip the 0 coefficient
|
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||||
b1 = *pCoeffs++;
|
*/
|
||||||
b2 = *pCoeffs++;
|
|
||||||
a1 = *pCoeffs++;
|
sample = blockSize;
|
||||||
a2 = *pCoeffs++;
|
|
||||||
|
while (sample > 0U)
|
||||||
/* Reading the state values */
|
{
|
||||||
Xn1 = pState[0];
|
/* Read the input */
|
||||||
Xn2 = pState[1];
|
Xn = *pIn++;
|
||||||
Yn1 = pState[2];
|
|
||||||
Yn2 = pState[3];
|
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||||
|
/* acc = b0 * x[n] */
|
||||||
/* The variables acc holds the output value that is computed:
|
acc = (q31_t) b0 *Xn;
|
||||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
|
||||||
*/
|
/* acc += b1 * x[n-1] */
|
||||||
|
acc += (q31_t) b1 *Xn1;
|
||||||
sample = blockSize;
|
/* acc += b[2] * x[n-2] */
|
||||||
|
acc += (q31_t) b2 *Xn2;
|
||||||
while(sample > 0u)
|
/* acc += a1 * y[n-1] */
|
||||||
{
|
acc += (q31_t) a1 *Yn1;
|
||||||
/* Read the input */
|
/* acc += a2 * y[n-2] */
|
||||||
Xn = *pIn++;
|
acc += (q31_t) a2 *Yn2;
|
||||||
|
|
||||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
/* The result is converted to 1.31 */
|
||||||
/* acc = b0 * x[n] */
|
acc = __SSAT((acc >> shift), 16);
|
||||||
acc = (q31_t) b0 *Xn;
|
|
||||||
|
/* Every time after the output is computed state should be updated. */
|
||||||
/* acc += b1 * x[n-1] */
|
/* The states should be updated as: */
|
||||||
acc += (q31_t) b1 *Xn1;
|
/* Xn2 = Xn1 */
|
||||||
/* acc += b[2] * x[n-2] */
|
/* Xn1 = Xn */
|
||||||
acc += (q31_t) b2 *Xn2;
|
/* Yn2 = Yn1 */
|
||||||
/* acc += a1 * y[n-1] */
|
/* Yn1 = acc */
|
||||||
acc += (q31_t) a1 *Yn1;
|
Xn2 = Xn1;
|
||||||
/* acc += a2 * y[n-2] */
|
Xn1 = Xn;
|
||||||
acc += (q31_t) a2 *Yn2;
|
Yn2 = Yn1;
|
||||||
|
Yn1 = (q15_t) acc;
|
||||||
/* The result is converted to 1.31 */
|
|
||||||
acc = __SSAT((acc >> shift), 16);
|
/* Store the output in the destination buffer. */
|
||||||
|
*pOut++ = (q15_t) acc;
|
||||||
/* Every time after the output is computed state should be updated. */
|
|
||||||
/* The states should be updated as: */
|
/* decrement the loop counter */
|
||||||
/* Xn2 = Xn1 */
|
sample--;
|
||||||
/* Xn1 = Xn */
|
}
|
||||||
/* Yn2 = Yn1 */
|
|
||||||
/* Yn1 = acc */
|
/* The first stage goes from the input buffer to the output buffer. */
|
||||||
Xn2 = Xn1;
|
/* Subsequent stages occur in-place in the output buffer */
|
||||||
Xn1 = Xn;
|
pIn = pDst;
|
||||||
Yn2 = Yn1;
|
|
||||||
Yn1 = (q15_t) acc;
|
/* Reset to destination pointer */
|
||||||
|
pOut = pDst;
|
||||||
/* Store the output in the destination buffer. */
|
|
||||||
*pOut++ = (q15_t) acc;
|
/* Store the updated state variables back into the pState array */
|
||||||
|
*pState++ = Xn1;
|
||||||
/* decrement the loop counter */
|
*pState++ = Xn2;
|
||||||
sample--;
|
*pState++ = Yn1;
|
||||||
}
|
*pState++ = Yn2;
|
||||||
|
|
||||||
/* The first stage goes from the input buffer to the output buffer. */
|
} while (--stage);
|
||||||
/* Subsequent stages occur in-place in the output buffer */
|
|
||||||
pIn = pDst;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
|
||||||
/* Reset to destination pointer */
|
}
|
||||||
pOut = pDst;
|
|
||||||
|
|
||||||
/* Store the updated state variables back into the pState array */
|
/**
|
||||||
*pState++ = Xn1;
|
* @} end of BiquadCascadeDF1 group
|
||||||
*pState++ = Xn2;
|
*/
|
||||||
*pState++ = Yn1;
|
|
||||||
*pState++ = Yn2;
|
|
||||||
|
|
||||||
} while(--stage);
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY */
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BiquadCascadeDF1 group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,405 +1,392 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_biquad_cascade_df1_q31.c
|
||||||
* $Date: 12. March 2014
|
* Description: Processing function for the Q31 Biquad cascade filter
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_biquad_cascade_df1_q31.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Processing function for the
|
* -------------------------------------------------------------------- */
|
||||||
* Q31 Biquad cascade filter
|
/*
|
||||||
*
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
*
|
||||||
*
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* Redistribution and use in source and binary forms, with or without
|
*
|
||||||
* modification, are permitted provided that the following conditions
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* are met:
|
* not use this file except in compliance with the License.
|
||||||
* - Redistributions of source code must retain the above copyright
|
* You may obtain a copy of the License at
|
||||||
* notice, this list of conditions and the following disclaimer.
|
*
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* notice, this list of conditions and the following disclaimer in
|
*
|
||||||
* the documentation and/or other materials provided with the
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distribution.
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* may be used to endorse or promote products derived from this
|
* See the License for the specific language governing permissions and
|
||||||
* software without specific prior written permission.
|
* limitations under the License.
|
||||||
*
|
*/
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
#include "arm_math.h"
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
/**
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
* @ingroup groupFilters
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
*/
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
/**
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
* @addtogroup BiquadCascadeDF1
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @{
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*/
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
* -------------------------------------------------------------------- */
|
/**
|
||||||
|
* @brief Processing function for the Q31 Biquad cascade filter.
|
||||||
#include "arm_math.h"
|
* @param[in] *S points to an instance of the Q31 Biquad cascade structure.
|
||||||
|
* @param[in] *pSrc points to the block of input data.
|
||||||
/**
|
* @param[out] *pDst points to the block of output data.
|
||||||
* @ingroup groupFilters
|
* @param[in] blockSize number of samples to process per call.
|
||||||
*/
|
* @return none.
|
||||||
|
*
|
||||||
/**
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @addtogroup BiquadCascadeDF1
|
* \par
|
||||||
* @{
|
* The function is implemented using an internal 64-bit accumulator.
|
||||||
*/
|
* The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
|
||||||
|
* Thus, if the accumulator result overflows it wraps around rather than clip.
|
||||||
/**
|
* In order to avoid overflows completely the input signal must be scaled down by 2 bits and lie in the range [-0.25 +0.25).
|
||||||
* @brief Processing function for the Q31 Biquad cascade filter.
|
* After all 5 multiply-accumulates are performed, the 2.62 accumulator is shifted by <code>postShift</code> bits and the result truncated to
|
||||||
* @param[in] *S points to an instance of the Q31 Biquad cascade structure.
|
* 1.31 format by discarding the low 32 bits.
|
||||||
* @param[in] *pSrc points to the block of input data.
|
*
|
||||||
* @param[out] *pDst points to the block of output data.
|
* \par
|
||||||
* @param[in] blockSize number of samples to process per call.
|
* Refer to the function <code>arm_biquad_cascade_df1_fast_q31()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4.
|
||||||
* @return none.
|
*/
|
||||||
*
|
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
void arm_biquad_cascade_df1_q31(
|
||||||
* \par
|
const arm_biquad_casd_df1_inst_q31 * S,
|
||||||
* The function is implemented using an internal 64-bit accumulator.
|
q31_t * pSrc,
|
||||||
* The accumulator has a 2.62 format and maintains full precision of the intermediate multiplication results but provides only a single guard bit.
|
q31_t * pDst,
|
||||||
* Thus, if the accumulator result overflows it wraps around rather than clip.
|
uint32_t blockSize)
|
||||||
* In order to avoid overflows completely the input signal must be scaled down by 2 bits and lie in the range [-0.25 +0.25).
|
{
|
||||||
* After all 5 multiply-accumulates are performed, the 2.62 accumulator is shifted by <code>postShift</code> bits and the result truncated to
|
q63_t acc; /* accumulator */
|
||||||
* 1.31 format by discarding the low 32 bits.
|
uint32_t uShift = ((uint32_t) S->postShift + 1U);
|
||||||
*
|
uint32_t lShift = 32U - uShift; /* Shift to be applied to the output */
|
||||||
* \par
|
q31_t *pIn = pSrc; /* input pointer initialization */
|
||||||
* Refer to the function <code>arm_biquad_cascade_df1_fast_q31()</code> for a faster but less precise implementation of this filter for Cortex-M3 and Cortex-M4.
|
q31_t *pOut = pDst; /* output pointer initialization */
|
||||||
*/
|
q31_t *pState = S->pState; /* pState pointer initialization */
|
||||||
|
q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */
|
||||||
void arm_biquad_cascade_df1_q31(
|
q31_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */
|
||||||
const arm_biquad_casd_df1_inst_q31 * S,
|
q31_t b0, b1, b2, a1, a2; /* Filter coefficients */
|
||||||
q31_t * pSrc,
|
q31_t Xn; /* temporary input */
|
||||||
q31_t * pDst,
|
uint32_t sample, stage = S->numStages; /* loop counters */
|
||||||
uint32_t blockSize)
|
|
||||||
{
|
|
||||||
q63_t acc; /* accumulator */
|
#if defined (ARM_MATH_DSP)
|
||||||
uint32_t uShift = ((uint32_t) S->postShift + 1u);
|
|
||||||
uint32_t lShift = 32u - uShift; /* Shift to be applied to the output */
|
q31_t acc_l, acc_h; /* temporary output variables */
|
||||||
q31_t *pIn = pSrc; /* input pointer initialization */
|
|
||||||
q31_t *pOut = pDst; /* output pointer initialization */
|
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
||||||
q31_t *pState = S->pState; /* pState pointer initialization */
|
|
||||||
q31_t *pCoeffs = S->pCoeffs; /* coeff pointer initialization */
|
do
|
||||||
q31_t Xn1, Xn2, Yn1, Yn2; /* Filter state variables */
|
{
|
||||||
q31_t b0, b1, b2, a1, a2; /* Filter coefficients */
|
/* Reading the coefficients */
|
||||||
q31_t Xn; /* temporary input */
|
b0 = *pCoeffs++;
|
||||||
uint32_t sample, stage = S->numStages; /* loop counters */
|
b1 = *pCoeffs++;
|
||||||
|
b2 = *pCoeffs++;
|
||||||
|
a1 = *pCoeffs++;
|
||||||
#ifndef ARM_MATH_CM0_FAMILY_FAMILY
|
a2 = *pCoeffs++;
|
||||||
|
|
||||||
q31_t acc_l, acc_h; /* temporary output variables */
|
/* Reading the state values */
|
||||||
|
Xn1 = pState[0];
|
||||||
/* Run the below code for Cortex-M4 and Cortex-M3 */
|
Xn2 = pState[1];
|
||||||
|
Yn1 = pState[2];
|
||||||
do
|
Yn2 = pState[3];
|
||||||
{
|
|
||||||
/* Reading the coefficients */
|
/* Apply loop unrolling and compute 4 output values simultaneously. */
|
||||||
b0 = *pCoeffs++;
|
/* The variable acc hold output values that are being computed:
|
||||||
b1 = *pCoeffs++;
|
*
|
||||||
b2 = *pCoeffs++;
|
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||||
a1 = *pCoeffs++;
|
*/
|
||||||
a2 = *pCoeffs++;
|
|
||||||
|
sample = blockSize >> 2U;
|
||||||
/* Reading the state values */
|
|
||||||
Xn1 = pState[0];
|
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
||||||
Xn2 = pState[1];
|
** a second loop below computes the remaining 1 to 3 samples. */
|
||||||
Yn1 = pState[2];
|
while (sample > 0U)
|
||||||
Yn2 = pState[3];
|
{
|
||||||
|
/* Read the input */
|
||||||
/* Apply loop unrolling and compute 4 output values simultaneously. */
|
Xn = *pIn++;
|
||||||
/* The variable acc hold output values that are being computed:
|
|
||||||
*
|
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
|
||||||
*/
|
/* acc = b0 * x[n] */
|
||||||
|
acc = (q63_t) b0 *Xn;
|
||||||
sample = blockSize >> 2u;
|
/* acc += b1 * x[n-1] */
|
||||||
|
acc += (q63_t) b1 *Xn1;
|
||||||
/* First part of the processing with loop unrolling. Compute 4 outputs at a time.
|
/* acc += b[2] * x[n-2] */
|
||||||
** a second loop below computes the remaining 1 to 3 samples. */
|
acc += (q63_t) b2 *Xn2;
|
||||||
while(sample > 0u)
|
/* acc += a1 * y[n-1] */
|
||||||
{
|
acc += (q63_t) a1 *Yn1;
|
||||||
/* Read the input */
|
/* acc += a2 * y[n-2] */
|
||||||
Xn = *pIn++;
|
acc += (q63_t) a2 *Yn2;
|
||||||
|
|
||||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
/* The result is converted to 1.31 , Yn2 variable is reused */
|
||||||
|
|
||||||
/* acc = b0 * x[n] */
|
/* Calc lower part of acc */
|
||||||
acc = (q63_t) b0 *Xn;
|
acc_l = acc & 0xffffffff;
|
||||||
/* acc += b1 * x[n-1] */
|
|
||||||
acc += (q63_t) b1 *Xn1;
|
/* Calc upper part of acc */
|
||||||
/* acc += b[2] * x[n-2] */
|
acc_h = (acc >> 32) & 0xffffffff;
|
||||||
acc += (q63_t) b2 *Xn2;
|
|
||||||
/* acc += a1 * y[n-1] */
|
/* Apply shift for lower part of acc and upper part of acc */
|
||||||
acc += (q63_t) a1 *Yn1;
|
Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||||
/* acc += a2 * y[n-2] */
|
|
||||||
acc += (q63_t) a2 *Yn2;
|
/* Store the output in the destination buffer. */
|
||||||
|
*pOut++ = Yn2;
|
||||||
/* The result is converted to 1.31 , Yn2 variable is reused */
|
|
||||||
|
/* Read the second input */
|
||||||
/* Calc lower part of acc */
|
Xn2 = *pIn++;
|
||||||
acc_l = acc & 0xffffffff;
|
|
||||||
|
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||||
/* Calc upper part of acc */
|
|
||||||
acc_h = (acc >> 32) & 0xffffffff;
|
/* acc = b0 * x[n] */
|
||||||
|
acc = (q63_t) b0 *Xn2;
|
||||||
/* Apply shift for lower part of acc and upper part of acc */
|
/* acc += b1 * x[n-1] */
|
||||||
Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
acc += (q63_t) b1 *Xn;
|
||||||
|
/* acc += b[2] * x[n-2] */
|
||||||
/* Store the output in the destination buffer. */
|
acc += (q63_t) b2 *Xn1;
|
||||||
*pOut++ = Yn2;
|
/* acc += a1 * y[n-1] */
|
||||||
|
acc += (q63_t) a1 *Yn2;
|
||||||
/* Read the second input */
|
/* acc += a2 * y[n-2] */
|
||||||
Xn2 = *pIn++;
|
acc += (q63_t) a2 *Yn1;
|
||||||
|
|
||||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
|
||||||
|
/* The result is converted to 1.31, Yn1 variable is reused */
|
||||||
/* acc = b0 * x[n] */
|
|
||||||
acc = (q63_t) b0 *Xn2;
|
/* Calc lower part of acc */
|
||||||
/* acc += b1 * x[n-1] */
|
acc_l = acc & 0xffffffff;
|
||||||
acc += (q63_t) b1 *Xn;
|
|
||||||
/* acc += b[2] * x[n-2] */
|
/* Calc upper part of acc */
|
||||||
acc += (q63_t) b2 *Xn1;
|
acc_h = (acc >> 32) & 0xffffffff;
|
||||||
/* acc += a1 * y[n-1] */
|
|
||||||
acc += (q63_t) a1 *Yn2;
|
|
||||||
/* acc += a2 * y[n-2] */
|
/* Apply shift for lower part of acc and upper part of acc */
|
||||||
acc += (q63_t) a2 *Yn1;
|
Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||||
|
|
||||||
|
/* Store the output in the destination buffer. */
|
||||||
/* The result is converted to 1.31, Yn1 variable is reused */
|
*pOut++ = Yn1;
|
||||||
|
|
||||||
/* Calc lower part of acc */
|
/* Read the third input */
|
||||||
acc_l = acc & 0xffffffff;
|
Xn1 = *pIn++;
|
||||||
|
|
||||||
/* Calc upper part of acc */
|
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||||
acc_h = (acc >> 32) & 0xffffffff;
|
|
||||||
|
/* acc = b0 * x[n] */
|
||||||
|
acc = (q63_t) b0 *Xn1;
|
||||||
/* Apply shift for lower part of acc and upper part of acc */
|
/* acc += b1 * x[n-1] */
|
||||||
Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
acc += (q63_t) b1 *Xn2;
|
||||||
|
/* acc += b[2] * x[n-2] */
|
||||||
/* Store the output in the destination buffer. */
|
acc += (q63_t) b2 *Xn;
|
||||||
*pOut++ = Yn1;
|
/* acc += a1 * y[n-1] */
|
||||||
|
acc += (q63_t) a1 *Yn1;
|
||||||
/* Read the third input */
|
/* acc += a2 * y[n-2] */
|
||||||
Xn1 = *pIn++;
|
acc += (q63_t) a2 *Yn2;
|
||||||
|
|
||||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
/* The result is converted to 1.31, Yn2 variable is reused */
|
||||||
|
/* Calc lower part of acc */
|
||||||
/* acc = b0 * x[n] */
|
acc_l = acc & 0xffffffff;
|
||||||
acc = (q63_t) b0 *Xn1;
|
|
||||||
/* acc += b1 * x[n-1] */
|
/* Calc upper part of acc */
|
||||||
acc += (q63_t) b1 *Xn2;
|
acc_h = (acc >> 32) & 0xffffffff;
|
||||||
/* acc += b[2] * x[n-2] */
|
|
||||||
acc += (q63_t) b2 *Xn;
|
|
||||||
/* acc += a1 * y[n-1] */
|
/* Apply shift for lower part of acc and upper part of acc */
|
||||||
acc += (q63_t) a1 *Yn1;
|
Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||||
/* acc += a2 * y[n-2] */
|
|
||||||
acc += (q63_t) a2 *Yn2;
|
/* Store the output in the destination buffer. */
|
||||||
|
*pOut++ = Yn2;
|
||||||
/* The result is converted to 1.31, Yn2 variable is reused */
|
|
||||||
/* Calc lower part of acc */
|
/* Read the forth input */
|
||||||
acc_l = acc & 0xffffffff;
|
Xn = *pIn++;
|
||||||
|
|
||||||
/* Calc upper part of acc */
|
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||||
acc_h = (acc >> 32) & 0xffffffff;
|
|
||||||
|
/* acc = b0 * x[n] */
|
||||||
|
acc = (q63_t) b0 *Xn;
|
||||||
/* Apply shift for lower part of acc and upper part of acc */
|
/* acc += b1 * x[n-1] */
|
||||||
Yn2 = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
acc += (q63_t) b1 *Xn1;
|
||||||
|
/* acc += b[2] * x[n-2] */
|
||||||
/* Store the output in the destination buffer. */
|
acc += (q63_t) b2 *Xn2;
|
||||||
*pOut++ = Yn2;
|
/* acc += a1 * y[n-1] */
|
||||||
|
acc += (q63_t) a1 *Yn2;
|
||||||
/* Read the forth input */
|
/* acc += a2 * y[n-2] */
|
||||||
Xn = *pIn++;
|
acc += (q63_t) a2 *Yn1;
|
||||||
|
|
||||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
/* The result is converted to 1.31, Yn1 variable is reused */
|
||||||
|
/* Calc lower part of acc */
|
||||||
/* acc = b0 * x[n] */
|
acc_l = acc & 0xffffffff;
|
||||||
acc = (q63_t) b0 *Xn;
|
|
||||||
/* acc += b1 * x[n-1] */
|
/* Calc upper part of acc */
|
||||||
acc += (q63_t) b1 *Xn1;
|
acc_h = (acc >> 32) & 0xffffffff;
|
||||||
/* acc += b[2] * x[n-2] */
|
|
||||||
acc += (q63_t) b2 *Xn2;
|
/* Apply shift for lower part of acc and upper part of acc */
|
||||||
/* acc += a1 * y[n-1] */
|
Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
||||||
acc += (q63_t) a1 *Yn2;
|
|
||||||
/* acc += a2 * y[n-2] */
|
/* Every time after the output is computed state should be updated. */
|
||||||
acc += (q63_t) a2 *Yn1;
|
/* The states should be updated as: */
|
||||||
|
/* Xn2 = Xn1 */
|
||||||
/* The result is converted to 1.31, Yn1 variable is reused */
|
/* Xn1 = Xn */
|
||||||
/* Calc lower part of acc */
|
/* Yn2 = Yn1 */
|
||||||
acc_l = acc & 0xffffffff;
|
/* Yn1 = acc */
|
||||||
|
Xn2 = Xn1;
|
||||||
/* Calc upper part of acc */
|
Xn1 = Xn;
|
||||||
acc_h = (acc >> 32) & 0xffffffff;
|
|
||||||
|
/* Store the output in the destination buffer. */
|
||||||
/* Apply shift for lower part of acc and upper part of acc */
|
*pOut++ = Yn1;
|
||||||
Yn1 = (uint32_t) acc_l >> lShift | acc_h << uShift;
|
|
||||||
|
/* decrement the loop counter */
|
||||||
/* Every time after the output is computed state should be updated. */
|
sample--;
|
||||||
/* The states should be updated as: */
|
}
|
||||||
/* Xn2 = Xn1 */
|
|
||||||
/* Xn1 = Xn */
|
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
||||||
/* Yn2 = Yn1 */
|
** No loop unrolling is used. */
|
||||||
/* Yn1 = acc */
|
sample = (blockSize & 0x3U);
|
||||||
Xn2 = Xn1;
|
|
||||||
Xn1 = Xn;
|
while (sample > 0U)
|
||||||
|
{
|
||||||
/* Store the output in the destination buffer. */
|
/* Read the input */
|
||||||
*pOut++ = Yn1;
|
Xn = *pIn++;
|
||||||
|
|
||||||
/* decrement the loop counter */
|
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||||
sample--;
|
|
||||||
}
|
/* acc = b0 * x[n] */
|
||||||
|
acc = (q63_t) b0 *Xn;
|
||||||
/* If the blockSize is not a multiple of 4, compute any remaining output samples here.
|
/* acc += b1 * x[n-1] */
|
||||||
** No loop unrolling is used. */
|
acc += (q63_t) b1 *Xn1;
|
||||||
sample = (blockSize & 0x3u);
|
/* acc += b[2] * x[n-2] */
|
||||||
|
acc += (q63_t) b2 *Xn2;
|
||||||
while(sample > 0u)
|
/* acc += a1 * y[n-1] */
|
||||||
{
|
acc += (q63_t) a1 *Yn1;
|
||||||
/* Read the input */
|
/* acc += a2 * y[n-2] */
|
||||||
Xn = *pIn++;
|
acc += (q63_t) a2 *Yn2;
|
||||||
|
|
||||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
/* The result is converted to 1.31 */
|
||||||
|
acc = acc >> lShift;
|
||||||
/* acc = b0 * x[n] */
|
|
||||||
acc = (q63_t) b0 *Xn;
|
/* Every time after the output is computed state should be updated. */
|
||||||
/* acc += b1 * x[n-1] */
|
/* The states should be updated as: */
|
||||||
acc += (q63_t) b1 *Xn1;
|
/* Xn2 = Xn1 */
|
||||||
/* acc += b[2] * x[n-2] */
|
/* Xn1 = Xn */
|
||||||
acc += (q63_t) b2 *Xn2;
|
/* Yn2 = Yn1 */
|
||||||
/* acc += a1 * y[n-1] */
|
/* Yn1 = acc */
|
||||||
acc += (q63_t) a1 *Yn1;
|
Xn2 = Xn1;
|
||||||
/* acc += a2 * y[n-2] */
|
Xn1 = Xn;
|
||||||
acc += (q63_t) a2 *Yn2;
|
Yn2 = Yn1;
|
||||||
|
Yn1 = (q31_t) acc;
|
||||||
/* The result is converted to 1.31 */
|
|
||||||
acc = acc >> lShift;
|
/* Store the output in the destination buffer. */
|
||||||
|
*pOut++ = (q31_t) acc;
|
||||||
/* Every time after the output is computed state should be updated. */
|
|
||||||
/* The states should be updated as: */
|
/* decrement the loop counter */
|
||||||
/* Xn2 = Xn1 */
|
sample--;
|
||||||
/* Xn1 = Xn */
|
}
|
||||||
/* Yn2 = Yn1 */
|
|
||||||
/* Yn1 = acc */
|
/* The first stage goes from the input buffer to the output buffer. */
|
||||||
Xn2 = Xn1;
|
/* Subsequent stages occur in-place in the output buffer */
|
||||||
Xn1 = Xn;
|
pIn = pDst;
|
||||||
Yn2 = Yn1;
|
|
||||||
Yn1 = (q31_t) acc;
|
/* Reset to destination pointer */
|
||||||
|
pOut = pDst;
|
||||||
/* Store the output in the destination buffer. */
|
|
||||||
*pOut++ = (q31_t) acc;
|
/* Store the updated state variables back into the pState array */
|
||||||
|
*pState++ = Xn1;
|
||||||
/* decrement the loop counter */
|
*pState++ = Xn2;
|
||||||
sample--;
|
*pState++ = Yn1;
|
||||||
}
|
*pState++ = Yn2;
|
||||||
|
|
||||||
/* The first stage goes from the input buffer to the output buffer. */
|
} while (--stage);
|
||||||
/* Subsequent stages occur in-place in the output buffer */
|
|
||||||
pIn = pDst;
|
#else
|
||||||
|
|
||||||
/* Reset to destination pointer */
|
/* Run the below code for Cortex-M0 */
|
||||||
pOut = pDst;
|
|
||||||
|
do
|
||||||
/* Store the updated state variables back into the pState array */
|
{
|
||||||
*pState++ = Xn1;
|
/* Reading the coefficients */
|
||||||
*pState++ = Xn2;
|
b0 = *pCoeffs++;
|
||||||
*pState++ = Yn1;
|
b1 = *pCoeffs++;
|
||||||
*pState++ = Yn2;
|
b2 = *pCoeffs++;
|
||||||
|
a1 = *pCoeffs++;
|
||||||
} while(--stage);
|
a2 = *pCoeffs++;
|
||||||
|
|
||||||
#else
|
/* Reading the state values */
|
||||||
|
Xn1 = pState[0];
|
||||||
/* Run the below code for Cortex-M0 */
|
Xn2 = pState[1];
|
||||||
|
Yn1 = pState[2];
|
||||||
do
|
Yn2 = pState[3];
|
||||||
{
|
|
||||||
/* Reading the coefficients */
|
/* The variables acc holds the output value that is computed:
|
||||||
b0 = *pCoeffs++;
|
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
||||||
b1 = *pCoeffs++;
|
*/
|
||||||
b2 = *pCoeffs++;
|
|
||||||
a1 = *pCoeffs++;
|
sample = blockSize;
|
||||||
a2 = *pCoeffs++;
|
|
||||||
|
while (sample > 0U)
|
||||||
/* Reading the state values */
|
{
|
||||||
Xn1 = pState[0];
|
/* Read the input */
|
||||||
Xn2 = pState[1];
|
Xn = *pIn++;
|
||||||
Yn1 = pState[2];
|
|
||||||
Yn2 = pState[3];
|
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
||||||
|
/* acc = b0 * x[n] */
|
||||||
/* The variables acc holds the output value that is computed:
|
acc = (q63_t) b0 *Xn;
|
||||||
* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2]
|
|
||||||
*/
|
/* acc += b1 * x[n-1] */
|
||||||
|
acc += (q63_t) b1 *Xn1;
|
||||||
sample = blockSize;
|
/* acc += b[2] * x[n-2] */
|
||||||
|
acc += (q63_t) b2 *Xn2;
|
||||||
while(sample > 0u)
|
/* acc += a1 * y[n-1] */
|
||||||
{
|
acc += (q63_t) a1 *Yn1;
|
||||||
/* Read the input */
|
/* acc += a2 * y[n-2] */
|
||||||
Xn = *pIn++;
|
acc += (q63_t) a2 *Yn2;
|
||||||
|
|
||||||
/* acc = b0 * x[n] + b1 * x[n-1] + b2 * x[n-2] + a1 * y[n-1] + a2 * y[n-2] */
|
/* The result is converted to 1.31 */
|
||||||
/* acc = b0 * x[n] */
|
acc = acc >> lShift;
|
||||||
acc = (q63_t) b0 *Xn;
|
|
||||||
|
/* Every time after the output is computed state should be updated. */
|
||||||
/* acc += b1 * x[n-1] */
|
/* The states should be updated as: */
|
||||||
acc += (q63_t) b1 *Xn1;
|
/* Xn2 = Xn1 */
|
||||||
/* acc += b[2] * x[n-2] */
|
/* Xn1 = Xn */
|
||||||
acc += (q63_t) b2 *Xn2;
|
/* Yn2 = Yn1 */
|
||||||
/* acc += a1 * y[n-1] */
|
/* Yn1 = acc */
|
||||||
acc += (q63_t) a1 *Yn1;
|
Xn2 = Xn1;
|
||||||
/* acc += a2 * y[n-2] */
|
Xn1 = Xn;
|
||||||
acc += (q63_t) a2 *Yn2;
|
Yn2 = Yn1;
|
||||||
|
Yn1 = (q31_t) acc;
|
||||||
/* The result is converted to 1.31 */
|
|
||||||
acc = acc >> lShift;
|
/* Store the output in the destination buffer. */
|
||||||
|
*pOut++ = (q31_t) acc;
|
||||||
/* Every time after the output is computed state should be updated. */
|
|
||||||
/* The states should be updated as: */
|
/* decrement the loop counter */
|
||||||
/* Xn2 = Xn1 */
|
sample--;
|
||||||
/* Xn1 = Xn */
|
}
|
||||||
/* Yn2 = Yn1 */
|
|
||||||
/* Yn1 = acc */
|
/* The first stage goes from the input buffer to the output buffer. */
|
||||||
Xn2 = Xn1;
|
/* Subsequent stages occur in-place in the output buffer */
|
||||||
Xn1 = Xn;
|
pIn = pDst;
|
||||||
Yn2 = Yn1;
|
|
||||||
Yn1 = (q31_t) acc;
|
/* Reset to destination pointer */
|
||||||
|
pOut = pDst;
|
||||||
/* Store the output in the destination buffer. */
|
|
||||||
*pOut++ = (q31_t) acc;
|
/* Store the updated state variables back into the pState array */
|
||||||
|
*pState++ = Xn1;
|
||||||
/* decrement the loop counter */
|
*pState++ = Xn2;
|
||||||
sample--;
|
*pState++ = Yn1;
|
||||||
}
|
*pState++ = Yn2;
|
||||||
|
|
||||||
/* The first stage goes from the input buffer to the output buffer. */
|
} while (--stage);
|
||||||
/* Subsequent stages occur in-place in the output buffer */
|
|
||||||
pIn = pDst;
|
#endif /* #if defined (ARM_MATH_DSP) */
|
||||||
|
}
|
||||||
/* Reset to destination pointer */
|
|
||||||
pOut = pDst;
|
|
||||||
|
|
||||||
/* Store the updated state variables back into the pState array */
|
|
||||||
*pState++ = Xn1;
|
/**
|
||||||
*pState++ = Xn2;
|
* @} end of BiquadCascadeDF1 group
|
||||||
*pState++ = Yn1;
|
*/
|
||||||
*pState++ = Yn2;
|
|
||||||
|
|
||||||
} while(--stage);
|
|
||||||
|
|
||||||
#endif /* #ifndef ARM_MATH_CM0_FAMILY_FAMILY */
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BiquadCascadeDF1 group
|
|
||||||
*/
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,102 +1,89 @@
|
||||||
/*-----------------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_biquad_cascade_df2T_init_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Initialization function for floating-point transposed direct form II Biquad cascade filter
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_biquad_cascade_df2T_init_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Initialization function for the floating-point transposed
|
* -------------------------------------------------------------------- */
|
||||||
* direct form II Biquad cascade filter.
|
/*
|
||||||
*
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
*
|
||||||
*
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* Redistribution and use in source and binary forms, with or without
|
*
|
||||||
* modification, are permitted provided that the following conditions
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* are met:
|
* not use this file except in compliance with the License.
|
||||||
* - Redistributions of source code must retain the above copyright
|
* You may obtain a copy of the License at
|
||||||
* notice, this list of conditions and the following disclaimer.
|
*
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* notice, this list of conditions and the following disclaimer in
|
*
|
||||||
* the documentation and/or other materials provided with the
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distribution.
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* may be used to endorse or promote products derived from this
|
* See the License for the specific language governing permissions and
|
||||||
* software without specific prior written permission.
|
* limitations under the License.
|
||||||
*
|
*/
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
#include "arm_math.h"
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
/**
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
* @ingroup groupFilters
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
*/
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
/**
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
* @addtogroup BiquadCascadeDF2T
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @{
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*/
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
* ---------------------------------------------------------------------------*/
|
/**
|
||||||
|
* @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
|
||||||
#include "arm_math.h"
|
* @param[in,out] *S points to an instance of the filter data structure.
|
||||||
|
* @param[in] numStages number of 2nd order stages in the filter.
|
||||||
/**
|
* @param[in] *pCoeffs points to the filter coefficients.
|
||||||
* @ingroup groupFilters
|
* @param[in] *pState points to the state buffer.
|
||||||
*/
|
* @return none
|
||||||
|
*
|
||||||
/**
|
* <b>Coefficient and State Ordering:</b>
|
||||||
* @addtogroup BiquadCascadeDF2T
|
* \par
|
||||||
* @{
|
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
||||||
*/
|
* <pre>
|
||||||
|
* {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
|
||||||
/**
|
* </pre>
|
||||||
* @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
|
*
|
||||||
* @param[in,out] *S points to an instance of the filter data structure.
|
* \par
|
||||||
* @param[in] numStages number of 2nd order stages in the filter.
|
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
||||||
* @param[in] *pCoeffs points to the filter coefficients.
|
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
||||||
* @param[in] *pState points to the state buffer.
|
* and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
|
||||||
* @return none
|
*
|
||||||
*
|
* \par
|
||||||
* <b>Coefficient and State Ordering:</b>
|
* The <code>pState</code> is a pointer to state array.
|
||||||
* \par
|
* Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code>.
|
||||||
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
* The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
|
||||||
* <pre>
|
* The state array has a total length of <code>2*numStages</code> values.
|
||||||
* {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
|
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
||||||
* </pre>
|
*/
|
||||||
*
|
|
||||||
* \par
|
void arm_biquad_cascade_df2T_init_f32(
|
||||||
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
arm_biquad_cascade_df2T_instance_f32 * S,
|
||||||
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
uint8_t numStages,
|
||||||
* and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
|
float32_t * pCoeffs,
|
||||||
*
|
float32_t * pState)
|
||||||
* \par
|
{
|
||||||
* The <code>pState</code> is a pointer to state array.
|
/* Assign filter stages */
|
||||||
* Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code>.
|
S->numStages = numStages;
|
||||||
* The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
|
|
||||||
* The state array has a total length of <code>2*numStages</code> values.
|
/* Assign coefficient pointer */
|
||||||
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
S->pCoeffs = pCoeffs;
|
||||||
*/
|
|
||||||
|
/* Clear state buffer and size is always 2 * numStages */
|
||||||
void arm_biquad_cascade_df2T_init_f32(
|
memset(pState, 0, (2U * (uint32_t) numStages) * sizeof(float32_t));
|
||||||
arm_biquad_cascade_df2T_instance_f32 * S,
|
|
||||||
uint8_t numStages,
|
/* Assign state pointer */
|
||||||
float32_t * pCoeffs,
|
S->pState = pState;
|
||||||
float32_t * pState)
|
}
|
||||||
{
|
|
||||||
/* Assign filter stages */
|
/**
|
||||||
S->numStages = numStages;
|
* @} end of BiquadCascadeDF2T group
|
||||||
|
*/
|
||||||
/* Assign coefficient pointer */
|
|
||||||
S->pCoeffs = pCoeffs;
|
|
||||||
|
|
||||||
/* Clear state buffer and size is always 2 * numStages */
|
|
||||||
memset(pState, 0, (2u * (uint32_t) numStages) * sizeof(float32_t));
|
|
||||||
|
|
||||||
/* Assign state pointer */
|
|
||||||
S->pState = pState;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BiquadCascadeDF2T group
|
|
||||||
*/
|
|
||||||
|
|
|
@ -1,102 +1,89 @@
|
||||||
/*-----------------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_biquad_cascade_df2T_init_f64.c
|
||||||
* $Date: 12. March 2014
|
* Description: Initialization function for floating-point transposed direct form II Biquad cascade filter
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_biquad_cascade_df2T_init_f64.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Initialization function for the floating-point transposed
|
* -------------------------------------------------------------------- */
|
||||||
* direct form II Biquad cascade filter.
|
/*
|
||||||
*
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
*
|
||||||
*
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* Redistribution and use in source and binary forms, with or without
|
*
|
||||||
* modification, are permitted provided that the following conditions
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* are met:
|
* not use this file except in compliance with the License.
|
||||||
* - Redistributions of source code must retain the above copyright
|
* You may obtain a copy of the License at
|
||||||
* notice, this list of conditions and the following disclaimer.
|
*
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* notice, this list of conditions and the following disclaimer in
|
*
|
||||||
* the documentation and/or other materials provided with the
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distribution.
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* may be used to endorse or promote products derived from this
|
* See the License for the specific language governing permissions and
|
||||||
* software without specific prior written permission.
|
* limitations under the License.
|
||||||
*
|
*/
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
#include "arm_math.h"
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
/**
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
* @ingroup groupFilters
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
*/
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
/**
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
* @addtogroup BiquadCascadeDF2T
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @{
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*/
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
* ---------------------------------------------------------------------------*/
|
/**
|
||||||
|
* @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
|
||||||
#include "arm_math.h"
|
* @param[in,out] *S points to an instance of the filter data structure.
|
||||||
|
* @param[in] numStages number of 2nd order stages in the filter.
|
||||||
/**
|
* @param[in] *pCoeffs points to the filter coefficients.
|
||||||
* @ingroup groupFilters
|
* @param[in] *pState points to the state buffer.
|
||||||
*/
|
* @return none
|
||||||
|
*
|
||||||
/**
|
* <b>Coefficient and State Ordering:</b>
|
||||||
* @addtogroup BiquadCascadeDF2T
|
* \par
|
||||||
* @{
|
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
||||||
*/
|
* <pre>
|
||||||
|
* {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
|
||||||
/**
|
* </pre>
|
||||||
* @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
|
*
|
||||||
* @param[in,out] *S points to an instance of the filter data structure.
|
* \par
|
||||||
* @param[in] numStages number of 2nd order stages in the filter.
|
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
||||||
* @param[in] *pCoeffs points to the filter coefficients.
|
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
||||||
* @param[in] *pState points to the state buffer.
|
* and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
|
||||||
* @return none
|
*
|
||||||
*
|
* \par
|
||||||
* <b>Coefficient and State Ordering:</b>
|
* The <code>pState</code> is a pointer to state array.
|
||||||
* \par
|
* Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code>.
|
||||||
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
* The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
|
||||||
* <pre>
|
* The state array has a total length of <code>2*numStages</code> values.
|
||||||
* {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
|
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
||||||
* </pre>
|
*/
|
||||||
*
|
|
||||||
* \par
|
void arm_biquad_cascade_df2T_init_f64(
|
||||||
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
arm_biquad_cascade_df2T_instance_f64 * S,
|
||||||
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
uint8_t numStages,
|
||||||
* and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
|
float64_t * pCoeffs,
|
||||||
*
|
float64_t * pState)
|
||||||
* \par
|
{
|
||||||
* The <code>pState</code> is a pointer to state array.
|
/* Assign filter stages */
|
||||||
* Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code>.
|
S->numStages = numStages;
|
||||||
* The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
|
|
||||||
* The state array has a total length of <code>2*numStages</code> values.
|
/* Assign coefficient pointer */
|
||||||
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
S->pCoeffs = pCoeffs;
|
||||||
*/
|
|
||||||
|
/* Clear state buffer and size is always 2 * numStages */
|
||||||
void arm_biquad_cascade_df2T_init_f64(
|
memset(pState, 0, (2U * (uint32_t) numStages) * sizeof(float64_t));
|
||||||
arm_biquad_cascade_df2T_instance_f64 * S,
|
|
||||||
uint8_t numStages,
|
/* Assign state pointer */
|
||||||
float64_t * pCoeffs,
|
S->pState = pState;
|
||||||
float64_t * pState)
|
}
|
||||||
{
|
|
||||||
/* Assign filter stages */
|
/**
|
||||||
S->numStages = numStages;
|
* @} end of BiquadCascadeDF2T group
|
||||||
|
*/
|
||||||
/* Assign coefficient pointer */
|
|
||||||
S->pCoeffs = pCoeffs;
|
|
||||||
|
|
||||||
/* Clear state buffer and size is always 2 * numStages */
|
|
||||||
memset(pState, 0, (2u * (uint32_t) numStages) * sizeof(float64_t));
|
|
||||||
|
|
||||||
/* Assign state pointer */
|
|
||||||
S->pState = pState;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BiquadCascadeDF2T group
|
|
||||||
*/
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,102 +1,89 @@
|
||||||
/*-----------------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_biquad_cascade_stereo_df2T_init_f32.c
|
||||||
* $Date: 12. March 2014
|
* Description: Initialization function for floating-point transposed direct form II Biquad cascade filter
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_biquad_cascade_stereo_df2T_init_f32.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Initialization function for the floating-point transposed
|
* -------------------------------------------------------------------- */
|
||||||
* direct form II Biquad cascade filter.
|
/*
|
||||||
*
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
*
|
||||||
*
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* Redistribution and use in source and binary forms, with or without
|
*
|
||||||
* modification, are permitted provided that the following conditions
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* are met:
|
* not use this file except in compliance with the License.
|
||||||
* - Redistributions of source code must retain the above copyright
|
* You may obtain a copy of the License at
|
||||||
* notice, this list of conditions and the following disclaimer.
|
*
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* notice, this list of conditions and the following disclaimer in
|
*
|
||||||
* the documentation and/or other materials provided with the
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* distribution.
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* may be used to endorse or promote products derived from this
|
* See the License for the specific language governing permissions and
|
||||||
* software without specific prior written permission.
|
* limitations under the License.
|
||||||
*
|
*/
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
#include "arm_math.h"
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
/**
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
* @ingroup groupFilters
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
*/
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
/**
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
* @addtogroup BiquadCascadeDF2T
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @{
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
*/
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
* ---------------------------------------------------------------------------*/
|
/**
|
||||||
|
* @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
|
||||||
#include "arm_math.h"
|
* @param[in,out] *S points to an instance of the filter data structure.
|
||||||
|
* @param[in] numStages number of 2nd order stages in the filter.
|
||||||
/**
|
* @param[in] *pCoeffs points to the filter coefficients.
|
||||||
* @ingroup groupFilters
|
* @param[in] *pState points to the state buffer.
|
||||||
*/
|
* @return none
|
||||||
|
*
|
||||||
/**
|
* <b>Coefficient and State Ordering:</b>
|
||||||
* @addtogroup BiquadCascadeDF2T
|
* \par
|
||||||
* @{
|
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
||||||
*/
|
* <pre>
|
||||||
|
* {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
|
||||||
/**
|
* </pre>
|
||||||
* @brief Initialization function for the floating-point transposed direct form II Biquad cascade filter.
|
*
|
||||||
* @param[in,out] *S points to an instance of the filter data structure.
|
* \par
|
||||||
* @param[in] numStages number of 2nd order stages in the filter.
|
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
||||||
* @param[in] *pCoeffs points to the filter coefficients.
|
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
||||||
* @param[in] *pState points to the state buffer.
|
* and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
|
||||||
* @return none
|
*
|
||||||
*
|
* \par
|
||||||
* <b>Coefficient and State Ordering:</b>
|
* The <code>pState</code> is a pointer to state array.
|
||||||
* \par
|
* Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code> for each channel.
|
||||||
* The coefficients are stored in the array <code>pCoeffs</code> in the following order:
|
* The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
|
||||||
* <pre>
|
* The state array has a total length of <code>2*numStages</code> values.
|
||||||
* {b10, b11, b12, a11, a12, b20, b21, b22, a21, a22, ...}
|
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
||||||
* </pre>
|
*/
|
||||||
*
|
|
||||||
* \par
|
void arm_biquad_cascade_stereo_df2T_init_f32(
|
||||||
* where <code>b1x</code> and <code>a1x</code> are the coefficients for the first stage,
|
arm_biquad_cascade_stereo_df2T_instance_f32 * S,
|
||||||
* <code>b2x</code> and <code>a2x</code> are the coefficients for the second stage,
|
uint8_t numStages,
|
||||||
* and so on. The <code>pCoeffs</code> array contains a total of <code>5*numStages</code> values.
|
float32_t * pCoeffs,
|
||||||
*
|
float32_t * pState)
|
||||||
* \par
|
{
|
||||||
* The <code>pState</code> is a pointer to state array.
|
/* Assign filter stages */
|
||||||
* Each Biquad stage has 2 state variables <code>d1,</code> and <code>d2</code> for each channel.
|
S->numStages = numStages;
|
||||||
* The 2 state variables for stage 1 are first, then the 2 state variables for stage 2, and so on.
|
|
||||||
* The state array has a total length of <code>2*numStages</code> values.
|
/* Assign coefficient pointer */
|
||||||
* The state variables are updated after each block of data is processed; the coefficients are untouched.
|
S->pCoeffs = pCoeffs;
|
||||||
*/
|
|
||||||
|
/* Clear state buffer and size is always 4 * numStages */
|
||||||
void arm_biquad_cascade_stereo_df2T_init_f32(
|
memset(pState, 0, (4U * (uint32_t) numStages) * sizeof(float32_t));
|
||||||
arm_biquad_cascade_stereo_df2T_instance_f32 * S,
|
|
||||||
uint8_t numStages,
|
/* Assign state pointer */
|
||||||
float32_t * pCoeffs,
|
S->pState = pState;
|
||||||
float32_t * pState)
|
}
|
||||||
{
|
|
||||||
/* Assign filter stages */
|
/**
|
||||||
S->numStages = numStages;
|
* @} end of BiquadCascadeDF2T group
|
||||||
|
*/
|
||||||
/* Assign coefficient pointer */
|
|
||||||
S->pCoeffs = pCoeffs;
|
|
||||||
|
|
||||||
/* Clear state buffer and size is always 4 * numStages */
|
|
||||||
memset(pState, 0, (4u * (uint32_t) numStages) * sizeof(float32_t));
|
|
||||||
|
|
||||||
/* Assign state pointer */
|
|
||||||
S->pState = pState;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of BiquadCascadeDF2T group
|
|
||||||
*/
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -1,435 +1,423 @@
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
* Copyright (C) 2010-2014 ARM Limited. All rights reserved.
|
* Project: CMSIS DSP Library
|
||||||
*
|
* Title: arm_conv_opt_q7.c
|
||||||
* $Date: 12. March 2014
|
* Description: Convolution of Q7 sequences
|
||||||
* $Revision: V1.4.4
|
*
|
||||||
*
|
* $Date: 27. January 2017
|
||||||
* Project: CMSIS DSP Library
|
* $Revision: V.1.5.1
|
||||||
* Title: arm_conv_opt_q7.c
|
*
|
||||||
*
|
* Target Processor: Cortex-M cores
|
||||||
* Description: Convolution of Q7 sequences.
|
* -------------------------------------------------------------------- */
|
||||||
*
|
/*
|
||||||
* Target Processor: Cortex-M4/Cortex-M3/Cortex-M0
|
* Copyright (C) 2010-2017 ARM Limited or its affiliates. All rights reserved.
|
||||||
*
|
*
|
||||||
* Redistribution and use in source and binary forms, with or without
|
* SPDX-License-Identifier: Apache-2.0
|
||||||
* modification, are permitted provided that the following conditions
|
*
|
||||||
* are met:
|
* Licensed under the Apache License, Version 2.0 (the License); you may
|
||||||
* - Redistributions of source code must retain the above copyright
|
* not use this file except in compliance with the License.
|
||||||
* notice, this list of conditions and the following disclaimer.
|
* You may obtain a copy of the License at
|
||||||
* - Redistributions in binary form must reproduce the above copyright
|
*
|
||||||
* notice, this list of conditions and the following disclaimer in
|
* www.apache.org/licenses/LICENSE-2.0
|
||||||
* the documentation and/or other materials provided with the
|
*
|
||||||
* distribution.
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
* - Neither the name of ARM LIMITED nor the names of its contributors
|
* distributed under the License is distributed on an AS IS BASIS, WITHOUT
|
||||||
* may be used to endorse or promote products derived from this
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
* software without specific prior written permission.
|
* See the License for the specific language governing permissions and
|
||||||
*
|
* limitations under the License.
|
||||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
*/
|
||||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
||||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
#include "arm_math.h"
|
||||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
||||||
* COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
/**
|
||||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
* @ingroup groupFilters
|
||||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
*/
|
||||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
|
||||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
/**
|
||||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
|
* @addtogroup Conv
|
||||||
* ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
* @{
|
||||||
* POSSIBILITY OF SUCH DAMAGE.
|
*/
|
||||||
* -------------------------------------------------------------------- */
|
|
||||||
|
/**
|
||||||
#include "arm_math.h"
|
* @brief Convolution of Q7 sequences.
|
||||||
|
* @param[in] *pSrcA points to the first input sequence.
|
||||||
/**
|
* @param[in] srcALen length of the first input sequence.
|
||||||
* @ingroup groupFilters
|
* @param[in] *pSrcB points to the second input sequence.
|
||||||
*/
|
* @param[in] srcBLen length of the second input sequence.
|
||||||
|
* @param[out] *pDst points to the location where the output result is written. Length srcALen+srcBLen-1.
|
||||||
/**
|
* @param[in] *pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
|
||||||
* @addtogroup Conv
|
* @param[in] *pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
|
||||||
* @{
|
* @return none.
|
||||||
*/
|
*
|
||||||
|
* \par Restrictions
|
||||||
/**
|
* If the silicon does not support unaligned memory access enable the macro UNALIGNED_SUPPORT_DISABLE
|
||||||
* @brief Convolution of Q7 sequences.
|
* In this case input, output, scratch1 and scratch2 buffers should be aligned by 32-bit
|
||||||
* @param[in] *pSrcA points to the first input sequence.
|
*
|
||||||
* @param[in] srcALen length of the first input sequence.
|
* @details
|
||||||
* @param[in] *pSrcB points to the second input sequence.
|
* <b>Scaling and Overflow Behavior:</b>
|
||||||
* @param[in] srcBLen length of the second input sequence.
|
*
|
||||||
* @param[out] *pDst points to the location where the output result is written. Length srcALen+srcBLen-1.
|
* \par
|
||||||
* @param[in] *pScratch1 points to scratch buffer(of type q15_t) of size max(srcALen, srcBLen) + 2*min(srcALen, srcBLen) - 2.
|
* The function is implemented using a 32-bit internal accumulator.
|
||||||
* @param[in] *pScratch2 points to scratch buffer (of type q15_t) of size min(srcALen, srcBLen).
|
* Both the inputs are represented in 1.7 format and multiplications yield a 2.14 result.
|
||||||
* @return none.
|
* The 2.14 intermediate results are accumulated in a 32-bit accumulator in 18.14 format.
|
||||||
*
|
* This approach provides 17 guard bits and there is no risk of overflow as long as <code>max(srcALen, srcBLen)<131072</code>.
|
||||||
* \par Restrictions
|
* The 18.14 result is then truncated to 18.7 format by discarding the low 7 bits and then saturated to 1.7 format.
|
||||||
* If the silicon does not support unaligned memory access enable the macro UNALIGNED_SUPPORT_DISABLE
|
*
|
||||||
* In this case input, output, scratch1 and scratch2 buffers should be aligned by 32-bit
|
*/
|
||||||
*
|
|
||||||
* @details
|
void arm_conv_opt_q7(
|
||||||
* <b>Scaling and Overflow Behavior:</b>
|
q7_t * pSrcA,
|
||||||
*
|
uint32_t srcALen,
|
||||||
* \par
|
q7_t * pSrcB,
|
||||||
* The function is implemented using a 32-bit internal accumulator.
|
uint32_t srcBLen,
|
||||||
* Both the inputs are represented in 1.7 format and multiplications yield a 2.14 result.
|
q7_t * pDst,
|
||||||
* The 2.14 intermediate results are accumulated in a 32-bit accumulator in 18.14 format.
|
q15_t * pScratch1,
|
||||||
* This approach provides 17 guard bits and there is no risk of overflow as long as <code>max(srcALen, srcBLen)<131072</code>.
|
q15_t * pScratch2)
|
||||||
* The 18.14 result is then truncated to 18.7 format by discarding the low 7 bits and then saturated to 1.7 format.
|
{
|
||||||
*
|
|
||||||
*/
|
q15_t *pScr2, *pScr1; /* Intermediate pointers for scratch pointers */
|
||||||
|
q15_t x4; /* Temporary input variable */
|
||||||
void arm_conv_opt_q7(
|
q7_t *pIn1, *pIn2; /* inputA and inputB pointer */
|
||||||
q7_t * pSrcA,
|
uint32_t j, k, blkCnt, tapCnt; /* loop counter */
|
||||||
uint32_t srcALen,
|
q7_t *px; /* Temporary input1 pointer */
|
||||||
q7_t * pSrcB,
|
q15_t *py; /* Temporary input2 pointer */
|
||||||
uint32_t srcBLen,
|
q31_t acc0, acc1, acc2, acc3; /* Accumulator */
|
||||||
q7_t * pDst,
|
q31_t x1, x2, x3, y1; /* Temporary input variables */
|
||||||
q15_t * pScratch1,
|
q7_t *pOut = pDst; /* output pointer */
|
||||||
q15_t * pScratch2)
|
q7_t out0, out1, out2, out3; /* temporary variables */
|
||||||
{
|
|
||||||
|
/* The algorithm implementation is based on the lengths of the inputs. */
|
||||||
q15_t *pScr2, *pScr1; /* Intermediate pointers for scratch pointers */
|
/* srcB is always made to slide across srcA. */
|
||||||
q15_t x4; /* Temporary input variable */
|
/* So srcBLen is always considered as shorter or equal to srcALen */
|
||||||
q7_t *pIn1, *pIn2; /* inputA and inputB pointer */
|
if (srcALen >= srcBLen)
|
||||||
uint32_t j, k, blkCnt, tapCnt; /* loop counter */
|
{
|
||||||
q7_t *px; /* Temporary input1 pointer */
|
/* Initialization of inputA pointer */
|
||||||
q15_t *py; /* Temporary input2 pointer */
|
pIn1 = pSrcA;
|
||||||
q31_t acc0, acc1, acc2, acc3; /* Accumulator */
|
|
||||||
q31_t x1, x2, x3, y1; /* Temporary input variables */
|
/* Initialization of inputB pointer */
|
||||||
q7_t *pOut = pDst; /* output pointer */
|
pIn2 = pSrcB;
|
||||||
q7_t out0, out1, out2, out3; /* temporary variables */
|
}
|
||||||
|
else
|
||||||
/* The algorithm implementation is based on the lengths of the inputs. */
|
{
|
||||||
/* srcB is always made to slide across srcA. */
|
/* Initialization of inputA pointer */
|
||||||
/* So srcBLen is always considered as shorter or equal to srcALen */
|
pIn1 = pSrcB;
|
||||||
if(srcALen >= srcBLen)
|
|
||||||
{
|
/* Initialization of inputB pointer */
|
||||||
/* Initialization of inputA pointer */
|
pIn2 = pSrcA;
|
||||||
pIn1 = pSrcA;
|
|
||||||
|
/* srcBLen is always considered as shorter or equal to srcALen */
|
||||||
/* Initialization of inputB pointer */
|
j = srcBLen;
|
||||||
pIn2 = pSrcB;
|
srcBLen = srcALen;
|
||||||
}
|
srcALen = j;
|
||||||
else
|
}
|
||||||
{
|
|
||||||
/* Initialization of inputA pointer */
|
/* pointer to take end of scratch2 buffer */
|
||||||
pIn1 = pSrcB;
|
pScr2 = pScratch2;
|
||||||
|
|
||||||
/* Initialization of inputB pointer */
|
/* points to smaller length sequence */
|
||||||
pIn2 = pSrcA;
|
px = pIn2 + srcBLen - 1;
|
||||||
|
|
||||||
/* srcBLen is always considered as shorter or equal to srcALen */
|
/* Apply loop unrolling and do 4 Copies simultaneously. */
|
||||||
j = srcBLen;
|
k = srcBLen >> 2U;
|
||||||
srcBLen = srcALen;
|
|
||||||
srcALen = j;
|
/* First part of the processing with loop unrolling copies 4 data points at a time.
|
||||||
}
|
** a second loop below copies for the remaining 1 to 3 samples. */
|
||||||
|
while (k > 0U)
|
||||||
/* pointer to take end of scratch2 buffer */
|
{
|
||||||
pScr2 = pScratch2;
|
/* copy second buffer in reversal manner */
|
||||||
|
x4 = (q15_t) * px--;
|
||||||
/* points to smaller length sequence */
|
*pScr2++ = x4;
|
||||||
px = pIn2 + srcBLen - 1;
|
x4 = (q15_t) * px--;
|
||||||
|
*pScr2++ = x4;
|
||||||
/* Apply loop unrolling and do 4 Copies simultaneously. */
|
x4 = (q15_t) * px--;
|
||||||
k = srcBLen >> 2u;
|
*pScr2++ = x4;
|
||||||
|
x4 = (q15_t) * px--;
|
||||||
/* First part of the processing with loop unrolling copies 4 data points at a time.
|
*pScr2++ = x4;
|
||||||
** a second loop below copies for the remaining 1 to 3 samples. */
|
|
||||||
while(k > 0u)
|
/* Decrement the loop counter */
|
||||||
{
|
k--;
|
||||||
/* copy second buffer in reversal manner */
|
}
|
||||||
x4 = (q15_t) * px--;
|
|
||||||
*pScr2++ = x4;
|
/* If the count is not a multiple of 4, copy remaining samples here.
|
||||||
x4 = (q15_t) * px--;
|
** No loop unrolling is used. */
|
||||||
*pScr2++ = x4;
|
k = srcBLen % 0x4U;
|
||||||
x4 = (q15_t) * px--;
|
|
||||||
*pScr2++ = x4;
|
while (k > 0U)
|
||||||
x4 = (q15_t) * px--;
|
{
|
||||||
*pScr2++ = x4;
|
/* copy second buffer in reversal manner for remaining samples */
|
||||||
|
x4 = (q15_t) * px--;
|
||||||
/* Decrement the loop counter */
|
*pScr2++ = x4;
|
||||||
k--;
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
|
k--;
|
||||||
/* If the count is not a multiple of 4, copy remaining samples here.
|
}
|
||||||
** No loop unrolling is used. */
|
|
||||||
k = srcBLen % 0x4u;
|
/* Initialze temporary scratch pointer */
|
||||||
|
pScr1 = pScratch1;
|
||||||
while(k > 0u)
|
|
||||||
{
|
/* Fill (srcBLen - 1U) zeros in scratch buffer */
|
||||||
/* copy second buffer in reversal manner for remaining samples */
|
arm_fill_q15(0, pScr1, (srcBLen - 1U));
|
||||||
x4 = (q15_t) * px--;
|
|
||||||
*pScr2++ = x4;
|
/* Update temporary scratch pointer */
|
||||||
|
pScr1 += (srcBLen - 1U);
|
||||||
/* Decrement the loop counter */
|
|
||||||
k--;
|
/* Copy (srcALen) samples in scratch buffer */
|
||||||
}
|
/* Apply loop unrolling and do 4 Copies simultaneously. */
|
||||||
|
k = srcALen >> 2U;
|
||||||
/* Initialze temporary scratch pointer */
|
|
||||||
pScr1 = pScratch1;
|
/* First part of the processing with loop unrolling copies 4 data points at a time.
|
||||||
|
** a second loop below copies for the remaining 1 to 3 samples. */
|
||||||
/* Fill (srcBLen - 1u) zeros in scratch buffer */
|
while (k > 0U)
|
||||||
arm_fill_q15(0, pScr1, (srcBLen - 1u));
|
{
|
||||||
|
/* copy second buffer in reversal manner */
|
||||||
/* Update temporary scratch pointer */
|
x4 = (q15_t) * pIn1++;
|
||||||
pScr1 += (srcBLen - 1u);
|
*pScr1++ = x4;
|
||||||
|
x4 = (q15_t) * pIn1++;
|
||||||
/* Copy (srcALen) samples in scratch buffer */
|
*pScr1++ = x4;
|
||||||
/* Apply loop unrolling and do 4 Copies simultaneously. */
|
x4 = (q15_t) * pIn1++;
|
||||||
k = srcALen >> 2u;
|
*pScr1++ = x4;
|
||||||
|
x4 = (q15_t) * pIn1++;
|
||||||
/* First part of the processing with loop unrolling copies 4 data points at a time.
|
*pScr1++ = x4;
|
||||||
** a second loop below copies for the remaining 1 to 3 samples. */
|
|
||||||
while(k > 0u)
|
/* Decrement the loop counter */
|
||||||
{
|
k--;
|
||||||
/* copy second buffer in reversal manner */
|
}
|
||||||
x4 = (q15_t) * pIn1++;
|
|
||||||
*pScr1++ = x4;
|
/* If the count is not a multiple of 4, copy remaining samples here.
|
||||||
x4 = (q15_t) * pIn1++;
|
** No loop unrolling is used. */
|
||||||
*pScr1++ = x4;
|
k = srcALen % 0x4U;
|
||||||
x4 = (q15_t) * pIn1++;
|
|
||||||
*pScr1++ = x4;
|
while (k > 0U)
|
||||||
x4 = (q15_t) * pIn1++;
|
{
|
||||||
*pScr1++ = x4;
|
/* copy second buffer in reversal manner for remaining samples */
|
||||||
|
x4 = (q15_t) * pIn1++;
|
||||||
/* Decrement the loop counter */
|
*pScr1++ = x4;
|
||||||
k--;
|
|
||||||
}
|
/* Decrement the loop counter */
|
||||||
|
k--;
|
||||||
/* If the count is not a multiple of 4, copy remaining samples here.
|
}
|
||||||
** No loop unrolling is used. */
|
|
||||||
k = srcALen % 0x4u;
|
#ifndef UNALIGNED_SUPPORT_DISABLE
|
||||||
|
|
||||||
while(k > 0u)
|
/* Fill (srcBLen - 1U) zeros at end of scratch buffer */
|
||||||
{
|
arm_fill_q15(0, pScr1, (srcBLen - 1U));
|
||||||
/* copy second buffer in reversal manner for remaining samples */
|
|
||||||
x4 = (q15_t) * pIn1++;
|
/* Update pointer */
|
||||||
*pScr1++ = x4;
|
pScr1 += (srcBLen - 1U);
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
#else
|
||||||
k--;
|
|
||||||
}
|
/* Apply loop unrolling and do 4 Copies simultaneously. */
|
||||||
|
k = (srcBLen - 1U) >> 2U;
|
||||||
#ifndef UNALIGNED_SUPPORT_DISABLE
|
|
||||||
|
/* First part of the processing with loop unrolling copies 4 data points at a time.
|
||||||
/* Fill (srcBLen - 1u) zeros at end of scratch buffer */
|
** a second loop below copies for the remaining 1 to 3 samples. */
|
||||||
arm_fill_q15(0, pScr1, (srcBLen - 1u));
|
while (k > 0U)
|
||||||
|
{
|
||||||
/* Update pointer */
|
/* copy second buffer in reversal manner */
|
||||||
pScr1 += (srcBLen - 1u);
|
*pScr1++ = 0;
|
||||||
|
*pScr1++ = 0;
|
||||||
#else
|
*pScr1++ = 0;
|
||||||
|
*pScr1++ = 0;
|
||||||
/* Apply loop unrolling and do 4 Copies simultaneously. */
|
|
||||||
k = (srcBLen - 1u) >> 2u;
|
/* Decrement the loop counter */
|
||||||
|
k--;
|
||||||
/* First part of the processing with loop unrolling copies 4 data points at a time.
|
}
|
||||||
** a second loop below copies for the remaining 1 to 3 samples. */
|
|
||||||
while(k > 0u)
|
/* If the count is not a multiple of 4, copy remaining samples here.
|
||||||
{
|
** No loop unrolling is used. */
|
||||||
/* copy second buffer in reversal manner */
|
k = (srcBLen - 1U) % 0x4U;
|
||||||
*pScr1++ = 0;
|
|
||||||
*pScr1++ = 0;
|
while (k > 0U)
|
||||||
*pScr1++ = 0;
|
{
|
||||||
*pScr1++ = 0;
|
/* copy second buffer in reversal manner for remaining samples */
|
||||||
|
*pScr1++ = 0;
|
||||||
/* Decrement the loop counter */
|
|
||||||
k--;
|
/* Decrement the loop counter */
|
||||||
}
|
k--;
|
||||||
|
}
|
||||||
/* If the count is not a multiple of 4, copy remaining samples here.
|
|
||||||
** No loop unrolling is used. */
|
#endif
|
||||||
k = (srcBLen - 1u) % 0x4u;
|
|
||||||
|
/* Temporary pointer for scratch2 */
|
||||||
while(k > 0u)
|
py = pScratch2;
|
||||||
{
|
|
||||||
/* copy second buffer in reversal manner for remaining samples */
|
/* Initialization of pIn2 pointer */
|
||||||
*pScr1++ = 0;
|
pIn2 = (q7_t *) py;
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
pScr2 = py;
|
||||||
k--;
|
|
||||||
}
|
/* Actual convolution process starts here */
|
||||||
|
blkCnt = (srcALen + srcBLen - 1U) >> 2;
|
||||||
#endif
|
|
||||||
|
while (blkCnt > 0)
|
||||||
/* Temporary pointer for scratch2 */
|
{
|
||||||
py = pScratch2;
|
/* Initialze temporary scratch pointer as scratch1 */
|
||||||
|
pScr1 = pScratch1;
|
||||||
/* Initialization of pIn2 pointer */
|
|
||||||
pIn2 = (q7_t *) py;
|
/* Clear Accumlators */
|
||||||
|
acc0 = 0;
|
||||||
pScr2 = py;
|
acc1 = 0;
|
||||||
|
acc2 = 0;
|
||||||
/* Actual convolution process starts here */
|
acc3 = 0;
|
||||||
blkCnt = (srcALen + srcBLen - 1u) >> 2;
|
|
||||||
|
/* Read two samples from scratch1 buffer */
|
||||||
while(blkCnt > 0)
|
x1 = *__SIMD32(pScr1)++;
|
||||||
{
|
|
||||||
/* Initialze temporary scratch pointer as scratch1 */
|
/* Read next two samples from scratch1 buffer */
|
||||||
pScr1 = pScratch1;
|
x2 = *__SIMD32(pScr1)++;
|
||||||
|
|
||||||
/* Clear Accumlators */
|
tapCnt = (srcBLen) >> 2U;
|
||||||
acc0 = 0;
|
|
||||||
acc1 = 0;
|
while (tapCnt > 0U)
|
||||||
acc2 = 0;
|
{
|
||||||
acc3 = 0;
|
|
||||||
|
/* Read four samples from smaller buffer */
|
||||||
/* Read two samples from scratch1 buffer */
|
y1 = _SIMD32_OFFSET(pScr2);
|
||||||
x1 = *__SIMD32(pScr1)++;
|
|
||||||
|
/* multiply and accumlate */
|
||||||
/* Read next two samples from scratch1 buffer */
|
acc0 = __SMLAD(x1, y1, acc0);
|
||||||
x2 = *__SIMD32(pScr1)++;
|
acc2 = __SMLAD(x2, y1, acc2);
|
||||||
|
|
||||||
tapCnt = (srcBLen) >> 2u;
|
/* pack input data */
|
||||||
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
while(tapCnt > 0u)
|
x3 = __PKHBT(x2, x1, 0);
|
||||||
{
|
#else
|
||||||
|
x3 = __PKHBT(x1, x2, 0);
|
||||||
/* Read four samples from smaller buffer */
|
#endif
|
||||||
y1 = _SIMD32_OFFSET(pScr2);
|
|
||||||
|
/* multiply and accumlate */
|
||||||
/* multiply and accumlate */
|
acc1 = __SMLADX(x3, y1, acc1);
|
||||||
acc0 = __SMLAD(x1, y1, acc0);
|
|
||||||
acc2 = __SMLAD(x2, y1, acc2);
|
/* Read next two samples from scratch1 buffer */
|
||||||
|
x1 = *__SIMD32(pScr1)++;
|
||||||
/* pack input data */
|
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
/* pack input data */
|
||||||
x3 = __PKHBT(x2, x1, 0);
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
#else
|
x3 = __PKHBT(x1, x2, 0);
|
||||||
x3 = __PKHBT(x1, x2, 0);
|
#else
|
||||||
#endif
|
x3 = __PKHBT(x2, x1, 0);
|
||||||
|
#endif
|
||||||
/* multiply and accumlate */
|
|
||||||
acc1 = __SMLADX(x3, y1, acc1);
|
acc3 = __SMLADX(x3, y1, acc3);
|
||||||
|
|
||||||
/* Read next two samples from scratch1 buffer */
|
/* Read four samples from smaller buffer */
|
||||||
x1 = *__SIMD32(pScr1)++;
|
y1 = _SIMD32_OFFSET(pScr2 + 2U);
|
||||||
|
|
||||||
/* pack input data */
|
acc0 = __SMLAD(x2, y1, acc0);
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
|
||||||
x3 = __PKHBT(x1, x2, 0);
|
acc2 = __SMLAD(x1, y1, acc2);
|
||||||
#else
|
|
||||||
x3 = __PKHBT(x2, x1, 0);
|
acc1 = __SMLADX(x3, y1, acc1);
|
||||||
#endif
|
|
||||||
|
x2 = *__SIMD32(pScr1)++;
|
||||||
acc3 = __SMLADX(x3, y1, acc3);
|
|
||||||
|
#ifndef ARM_MATH_BIG_ENDIAN
|
||||||
/* Read four samples from smaller buffer */
|
x3 = __PKHBT(x2, x1, 0);
|
||||||
y1 = _SIMD32_OFFSET(pScr2 + 2u);
|
#else
|
||||||
|
x3 = __PKHBT(x1, x2, 0);
|
||||||
acc0 = __SMLAD(x2, y1, acc0);
|
#endif
|
||||||
|
|
||||||
acc2 = __SMLAD(x1, y1, acc2);
|
acc3 = __SMLADX(x3, y1, acc3);
|
||||||
|
|
||||||
acc1 = __SMLADX(x3, y1, acc1);
|
pScr2 += 4U;
|
||||||
|
|
||||||
x2 = *__SIMD32(pScr1)++;
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
#ifndef ARM_MATH_BIG_ENDIAN
|
tapCnt--;
|
||||||
x3 = __PKHBT(x2, x1, 0);
|
}
|
||||||
#else
|
|
||||||
x3 = __PKHBT(x1, x2, 0);
|
|
||||||
#endif
|
|
||||||
|
/* Update scratch pointer for remaining samples of smaller length sequence */
|
||||||
acc3 = __SMLADX(x3, y1, acc3);
|
pScr1 -= 4U;
|
||||||
|
|
||||||
pScr2 += 4u;
|
|
||||||
|
/* apply same above for remaining samples of smaller length sequence */
|
||||||
|
tapCnt = (srcBLen) & 3U;
|
||||||
/* Decrement the loop counter */
|
|
||||||
tapCnt--;
|
while (tapCnt > 0U)
|
||||||
}
|
{
|
||||||
|
|
||||||
|
/* accumlate the results */
|
||||||
|
acc0 += (*pScr1++ * *pScr2);
|
||||||
/* Update scratch pointer for remaining samples of smaller length sequence */
|
acc1 += (*pScr1++ * *pScr2);
|
||||||
pScr1 -= 4u;
|
acc2 += (*pScr1++ * *pScr2);
|
||||||
|
acc3 += (*pScr1++ * *pScr2++);
|
||||||
|
|
||||||
/* apply same above for remaining samples of smaller length sequence */
|
pScr1 -= 3U;
|
||||||
tapCnt = (srcBLen) & 3u;
|
|
||||||
|
/* Decrement the loop counter */
|
||||||
while(tapCnt > 0u)
|
tapCnt--;
|
||||||
{
|
}
|
||||||
|
|
||||||
/* accumlate the results */
|
blkCnt--;
|
||||||
acc0 += (*pScr1++ * *pScr2);
|
|
||||||
acc1 += (*pScr1++ * *pScr2);
|
/* Store the result in the accumulator in the destination buffer. */
|
||||||
acc2 += (*pScr1++ * *pScr2);
|
out0 = (q7_t) (__SSAT(acc0 >> 7U, 8));
|
||||||
acc3 += (*pScr1++ * *pScr2++);
|
out1 = (q7_t) (__SSAT(acc1 >> 7U, 8));
|
||||||
|
out2 = (q7_t) (__SSAT(acc2 >> 7U, 8));
|
||||||
pScr1 -= 3u;
|
out3 = (q7_t) (__SSAT(acc3 >> 7U, 8));
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
*__SIMD32(pOut)++ = __PACKq7(out0, out1, out2, out3);
|
||||||
tapCnt--;
|
|
||||||
}
|
/* Initialization of inputB pointer */
|
||||||
|
pScr2 = py;
|
||||||
blkCnt--;
|
|
||||||
|
pScratch1 += 4U;
|
||||||
/* Store the result in the accumulator in the destination buffer. */
|
|
||||||
out0 = (q7_t) (__SSAT(acc0 >> 7u, 8));
|
}
|
||||||
out1 = (q7_t) (__SSAT(acc1 >> 7u, 8));
|
|
||||||
out2 = (q7_t) (__SSAT(acc2 >> 7u, 8));
|
|
||||||
out3 = (q7_t) (__SSAT(acc3 >> 7u, 8));
|
blkCnt = (srcALen + srcBLen - 1U) & 0x3;
|
||||||
|
|
||||||
*__SIMD32(pOut)++ = __PACKq7(out0, out1, out2, out3);
|
/* Calculate convolution for remaining samples of Bigger length sequence */
|
||||||
|
while (blkCnt > 0)
|
||||||
/* Initialization of inputB pointer */
|
{
|
||||||
pScr2 = py;
|
/* Initialze temporary scratch pointer as scratch1 */
|
||||||
|
pScr1 = pScratch1;
|
||||||
pScratch1 += 4u;
|
|
||||||
|
/* Clear Accumlators */
|
||||||
}
|
acc0 = 0;
|
||||||
|
|
||||||
|
tapCnt = (srcBLen) >> 1U;
|
||||||
blkCnt = (srcALen + srcBLen - 1u) & 0x3;
|
|
||||||
|
while (tapCnt > 0U)
|
||||||
/* Calculate convolution for remaining samples of Bigger length sequence */
|
{
|
||||||
while(blkCnt > 0)
|
acc0 += (*pScr1++ * *pScr2++);
|
||||||
{
|
acc0 += (*pScr1++ * *pScr2++);
|
||||||
/* Initialze temporary scratch pointer as scratch1 */
|
|
||||||
pScr1 = pScratch1;
|
/* Decrement the loop counter */
|
||||||
|
tapCnt--;
|
||||||
/* Clear Accumlators */
|
}
|
||||||
acc0 = 0;
|
|
||||||
|
tapCnt = (srcBLen) & 1U;
|
||||||
tapCnt = (srcBLen) >> 1u;
|
|
||||||
|
/* apply same above for remaining samples of smaller length sequence */
|
||||||
while(tapCnt > 0u)
|
while (tapCnt > 0U)
|
||||||
{
|
{
|
||||||
acc0 += (*pScr1++ * *pScr2++);
|
|
||||||
acc0 += (*pScr1++ * *pScr2++);
|
/* accumlate the results */
|
||||||
|
acc0 += (*pScr1++ * *pScr2++);
|
||||||
/* Decrement the loop counter */
|
|
||||||
tapCnt--;
|
/* Decrement the loop counter */
|
||||||
}
|
tapCnt--;
|
||||||
|
}
|
||||||
tapCnt = (srcBLen) & 1u;
|
|
||||||
|
blkCnt--;
|
||||||
/* apply same above for remaining samples of smaller length sequence */
|
|
||||||
while(tapCnt > 0u)
|
/* Store the result in the accumulator in the destination buffer. */
|
||||||
{
|
*pOut++ = (q7_t) (__SSAT(acc0 >> 7U, 8));
|
||||||
|
|
||||||
/* accumlate the results */
|
/* Initialization of inputB pointer */
|
||||||
acc0 += (*pScr1++ * *pScr2++);
|
pScr2 = py;
|
||||||
|
|
||||||
/* Decrement the loop counter */
|
pScratch1 += 1U;
|
||||||
tapCnt--;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
blkCnt--;
|
}
|
||||||
|
|
||||||
/* Store the result in the accumulator in the destination buffer. */
|
|
||||||
*pOut++ = (q7_t) (__SSAT(acc0 >> 7u, 8));
|
/**
|
||||||
|
* @} end of Conv group
|
||||||
/* Initialization of inputB pointer */
|
*/
|
||||||
pScr2 = py;
|
|
||||||
|
|
||||||
pScratch1 += 1u;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @} end of Conv group
|
|
||||||
*/
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue