q15_t testInput[64];
q15_t testOutput[32];
q15_t ADC_In[32]{100,200,300,400,100,200,300,400,100,200,300,400,100,200,300,400,
100,200,300,400,100,200,300,400,100,200,300,400,100,200,300,400};
void FFTPro(void)
{
//存储为复数格式的输入表格 q15格式的32点复数FFT
for(i=0; i<32; i++)
{
testInput[i*2+1] = 0;
testInput[i*2] = TestIn[i];
}
arm_cfft_q15(&arm_cfft_sR_q15_len32,testInput,0, 1);
}
const arm_cfft_instance_q15 arm_cfft_sR_q15_len32 =
{
32, twiddleCoef_32_q15, armBitRevIndexTable_fixed_32, ARMBITREVINDEXTABLE_FIXED___32_TABLE_LENGTH
};
const q15_t twiddleCoef_32_q15[48] = {
(q15_t)0x7FFF, (q15_t)0x0000,
(q15_t)0x7D8A, (q15_t)0x18F8,
(q15_t)0x7641, (q15_t)0x30FB,
(q15_t)0x6A6D, (q15_t)0x471C,
(q15_t)0x5A82, (q15_t)0x5A82,
(q15_t)0x471C, (q15_t)0x6A6D,
(q15_t)0x30FB, (q15_t)0x7641,
(q15_t)0x18F8, (q15_t)0x7D8A,
(q15_t)0x0000, (q15_t)0x7FFF,
(q15_t)0xE707, (q15_t)0x7D8A,
(q15_t)0xCF04, (q15_t)0x7641,
(q15_t)0xB8E3, (q15_t)0x6A6D,
(q15_t)0xA57D, (q15_t)0x5A82,
(q15_t)0x9592, (q15_t)0x471C,
(q15_t)0x89BE, (q15_t)0x30FB,
(q15_t)0x8275, (q15_t)0x18F8,
(q15_t)0x8000, (q15_t)0x0000,
(q15_t)0x8275, (q15_t)0xE707,
(q15_t)0x89BE, (q15_t)0xCF04,
(q15_t)0x9592, (q15_t)0xB8E3,
(q15_t)0xA57D, (q15_t)0xA57D,
(q15_t)0xB8E3, (q15_t)0x9592,
(q15_t)0xCF04, (q15_t)0x89BE,
(q15_t)0xE707, (q15_t)0x8275
};
const uint16_t armBitRevIndexTable_fixed_32[ARMBITREVINDEXTABLE_FIXED___32_TABLE_LENGTH] =
{
//4x2, size 24
8,128, 16,64, 24,192, 40,160, 48,96, 56,224, 72,144,
88,208, 104,176, 120,240, 152,200, 184,232
};
#define ARMBITREVINDEXTABLE_FIXED___32_TABLE_LENGTH ((uint16_t)24 )
void arm_cfft_q15( const arm_cfft_instance_q15 * S, q15_t * p1,uint8_t ifftFlag,uint8_t bitReverseFlag)
{
uint32_t L = S->fftLen;
switch (L)
{
case 16:
case 64:
case 256:
case 1024:
case 4096: arm_radix4_butterfly_q15 ( p1, L, (q15_t*)S->pTwiddle, 1 );
break;
case 32:
case 128:
case 512:
case 2048: arm_cfft_radix4by2_q15 ( p1, L, S->pTwiddle );
break;
if( bitReverseFlag )
arm_bitreversal_16((uint16_t*)p1,S->bitRevLength,S->pBitRevTable);
}
/////////////////////////////////////
void arm_cfft_radix4by2_q15( q15_t * pSrc, uint32_t fftLen, const q15_t * pCoef)
{
uint32_t i;
uint32_t n2;
q15_t p0, p1, p2, p3;
q31_t T, S, R;
q31_t coeff, out1, out2;
const q15_t *pC = pCoef;
q15_t *pSi = pSrc;
q15_t *pSl = pSrc + fftLen;
n2 = fftLen >> 1;
for (i = n2; i > 0; i--)//连续计算
{
coeff = _SIMD32_OFFSET(pC);
pC += 2;
T = _SIMD32_OFFSET(pSi);
T = __SHADD16(T, 0); // this is just a SIMD arithmetic shift right by 1
S = _SIMD32_OFFSET(pSl);
S = __SHADD16(S, 0); // this is just a SIMD arithmetic shift right by 1
R = __QSUB16(T, S);
_SIMD32_OFFSET(pSi) = __SHADD16(T, S);
pSi += 2;
out1 = __SMUAD(coeff, R) >> 16;
out2 = __SMUSDX(coeff, R);
_SIMD32_OFFSET(pSl) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
pSl += 2;
}
// first col
arm_radix4_butterfly_q15( pSrc, n2, (q15_t*)pCoef, 2u);
// second col
arm_radix4_butterfly_q15( pSrc + fftLen, n2, (q15_t*)pCoef, 2u);
for (i = 0; i < fftLen >> 1; i++)
{
p0 = pSrc[4*i+0];
p1 = pSrc[4*i+1];
p2 = pSrc[4*i+2];
p3 = pSrc[4*i+3];
p0 <<= 1;
p1 <<= 1;
p2 <<= 1;
p3 <<= 1;
pSrc[4*i+0] = p0;
pSrc[4*i+1] = p1;
pSrc[4*i+2] = p2;
pSrc[4*i+3] = p3;
}
}
void arm_radix4_butterfly_q15( q15_t * pSrc16,uint32_t fftLen,q15_t * pCoef16,uint32_t twidCoefModifier)
{
q31_t R, S, T, U;
q31_t C1, C2, C3, out1, out2;
uint32_t n1, n2, ic, i0, j, k;
q15_t *ptr1;
q15_t *pSi0;
q15_t *pSi1;
q15_t *pSi2;
q15_t *pSi3;
q31_t xaya, xbyb, xcyc, xdyd;
/* Total process is divided into three stages */
/* process first stage, middle stages, & last stage */
/* Initializations for the first stage */
n2 = fftLen;
n1 = n2;
/* n2 = fftLen/4 */
n2 >>= 2u;
/* Index for twiddle coefficient */
ic = 0u;
/* Index for input read and output write */
j = n2;
pSi0 = pSrc16;
pSi1 = pSi0 + 2 * n2;
pSi2 = pSi1 + 2 * n2;
pSi3 = pSi2 + 2 * n2;
/* Input is in 1.15(q15) format */
/* start of first stage process */
do
{
/* Butterfly implementation */
/* Reading i0, i0+fftLen/2 inputs */
/* Read ya (real), xa(imag) input */
T = _SIMD32_OFFSET(pSi0);
T = __SHADD16(T, 0); // this is just a SIMD arithmetic shift right by 1
T = __SHADD16(T, 0); // it turns out doing this twice is 2 cycles, the alternative takes 3 cycles
//in = ((int16_t) (T & 0xFFFF)) >> 2; // alternative code that takes 3 cycles
//T = ((T >> 2) & 0xFFFF0000) | (in & 0xFFFF);
/* Read yc (real), xc(imag) input */
S = _SIMD32_OFFSET(pSi2);
S = __SHADD16(S, 0);
S = __SHADD16(S, 0);
/* R = packed((ya + yc), (xa + xc) ) */
R = __QADD16(T, S);
/* S = packed((ya - yc), (xa - xc) ) */
S = __QSUB16(T, S);
/* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
/* Read yb (real), xb(imag) input */
T = _SIMD32_OFFSET(pSi1);
T = __SHADD16(T, 0);
T = __SHADD16(T, 0);
/* Read yd (real), xd(imag) input */
U = _SIMD32_OFFSET(pSi3);
U = __SHADD16(U, 0);
U = __SHADD16(U, 0);
/* T = packed((yb + yd), (xb + xd) ) */
T = __QADD16(T, U);
/* writing the butterfly processed i0 sample */
/* xa' = xa + xb + xc + xd */
/* ya' = ya + yb + yc + yd */
_SIMD32_OFFSET(pSi0) = __SHADD16(R, T);
pSi0 += 2;
/* R = packed((ya + yc) - (yb + yd), (xa + xc)- (xb + xd)) */
R = __QSUB16(R, T);
/* co2 & si2 are read from SIMD Coefficient pointer */
C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic));
//#ifndef ARM_MATH_BIG_ENDIAN
/* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
out1 = __SMUAD(C2, R) >> 16u;
/* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
out2 = __SMUSDX(C2, R);
/* Reading i0+fftLen/4 */
/* T = packed(yb, xb) */
T = _SIMD32_OFFSET(pSi1);
T = __SHADD16(T, 0);
T = __SHADD16(T, 0);
/* writing the butterfly processed i0 + fftLen/4 sample */
/* writing output(xc', yc') in little endian format */
_SIMD32_OFFSET(pSi1) =
(q31_t) ((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
pSi1 += 2;
/* Butterfly calculations */
/* U = packed(yd, xd) */
U = _SIMD32_OFFSET(pSi3);
U = __SHADD16(U, 0);
U = __SHADD16(U, 0);
/* T = packed(yb-yd, xb-xd) */
T = __QSUB16(T, U);
/* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
R = __QASX(S, T);
/* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
S = __QSAX(S, T);
/* co1 & si1 are read from SIMD Coefficient pointer */
C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic));
/* Butterfly process for the i0+fftLen/2 sample */
/* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
out1 = __SMUAD(C1, S) >> 16u;
/* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
out2 = __SMUSDX(C1, S);
/* writing output(xb', yb') in little endian format */
_SIMD32_OFFSET(pSi2) =
((out2) & 0xFFFF0000) | ((out1) & 0x0000FFFF);
pSi2 += 2;
/* co3 & si3 are read from SIMD Coefficient pointer */
C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic));
/* Butterfly process for the i0+3fftLen/4 sample */
//#ifndef ARM_MATH_BIG_ENDIAN
/* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
out1 = __SMUAD(C3, R) >> 16u;
/* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
out2 = __SMUSDX(C3, R);
/* writing output(xd', yd') in little endian format */
_SIMD32_OFFSET(pSi3) =
((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
pSi3 += 2;
/* Twiddle coefficients index modifier */
ic = ic + twidCoefModifier;
} while(--j);
/* data is in 4.11(q11) format */
/* end of first stage process */
/* start of middle stage process */
/* Twiddle coefficients index modifier */
twidCoefModifier <<= 2u;
/* Calculation of Middle stage */
for (k = fftLen / 4u; k > 4u; k >>= 2u)
{
/* Initializations for the middle stage */
n1 = n2;
n2 >>= 2u;
ic = 0u;
for (j = 0u; j <= (n2 - 1u); j++)
{
/* index calculation for the coefficients */
C1 = _SIMD32_OFFSET(pCoef16 + (2u * ic));
C2 = _SIMD32_OFFSET(pCoef16 + (4u * ic));
C3 = _SIMD32_OFFSET(pCoef16 + (6u * ic));
/* Twiddle coefficients index modifier */
ic = ic + twidCoefModifier;
pSi0 = pSrc16 + 2 * j;
pSi1 = pSi0 + 2 * n2;
pSi2 = pSi1 + 2 * n2;
pSi3 = pSi2 + 2 * n2;
/* Butterfly implementation */
for (i0 = j; i0 < fftLen; i0 += n1)
{
/* Reading i0, i0+fftLen/2 inputs */
/* Read ya (real), xa(imag) input */
T = _SIMD32_OFFSET(pSi0);
/* Read yc (real), xc(imag) input */
S = _SIMD32_OFFSET(pSi2);
/* R = packed( (ya + yc), (xa + xc)) */
R = __QADD16(T, S);
/* S = packed((ya - yc), (xa - xc)) */
S = __QSUB16(T, S);
/* Reading i0+fftLen/4 , i0+3fftLen/4 inputs */
/* Read yb (real), xb(imag) input */
T = _SIMD32_OFFSET(pSi1);
/* Read yd (real), xd(imag) input */
U = _SIMD32_OFFSET(pSi3);
/* T = packed( (yb + yd), (xb + xd)) */
T = __QADD16(T, U);
/* writing the butterfly processed i0 sample */
/* xa' = xa + xb + xc + xd */
/* ya' = ya + yb + yc + yd */
out1 = __SHADD16(R, T);
out1 = __SHADD16(out1, 0);
_SIMD32_OFFSET(pSi0) = out1;
pSi0 += 2 * n1;
/* R = packed( (ya + yc) - (yb + yd), (xa + xc) - (xb + xd)) */
R = __SHSUB16(R, T);
/* (ya-yb+yc-yd)* (si2) + (xa-xb+xc-xd)* co2 */
out1 = __SMUAD(C2, R) >> 16u;
/* (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
out2 = __SMUSDX(C2, R);
/* Reading i0+3fftLen/4 */
/* Read yb (real), xb(imag) input */
T = _SIMD32_OFFSET(pSi1);
/* writing the butterfly processed i0 + fftLen/4 sample */
/* xc' = (xa-xb+xc-xd)* co2 + (ya-yb+yc-yd)* (si2) */
/* yc' = (ya-yb+yc-yd)* co2 - (xa-xb+xc-xd)* (si2) */
_SIMD32_OFFSET(pSi1) =
((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
pSi1 += 2 * n1;
/* Butterfly calculations */
/* Read yd (real), xd(imag) input */
U = _SIMD32_OFFSET(pSi3);
/* T = packed(yb-yd, xb-xd) */
T = __QSUB16(T, U);
/* R = packed((ya-yc) + (xb- xd) , (xa-xc) - (yb-yd)) */
R = __SHASX(S, T);
/* S = packed((ya-yc) - (xb- xd), (xa-xc) + (yb-yd)) */
S = __SHSAX(S, T);
/* Butterfly process for the i0+fftLen/2 sample */
out1 = __SMUAD(C1, S) >> 16u;
out2 = __SMUSDX(C1, S);
/* xb' = (xa+yb-xc-yd)* co1 + (ya-xb-yc+xd)* (si1) */
/* yb' = (ya-xb-yc+xd)* co1 - (xa+yb-xc-yd)* (si1) */
_SIMD32_OFFSET(pSi2) =
((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
pSi2 += 2 * n1;
/* Butterfly process for the i0+3fftLen/4 sample */
out1 = __SMUAD(C3, R) >> 16u;
out2 = __SMUSDX(C3, R);
/* xd' = (xa-yb-xc+yd)* co3 + (ya+xb-yc-xd)* (si3) */
/* yd' = (ya+xb-yc-xd)* co3 - (xa-yb-xc+yd)* (si3) */
_SIMD32_OFFSET(pSi3) =
((out2) & 0xFFFF0000) | (out1 & 0x0000FFFF);
pSi3 += 2 * n1;
}
}
/* Twiddle coefficients index modifier */
twidCoefModifier <<= 2u;
}
/* end of middle stage process */
/* data is in 10.6(q6) format for the 1024 point */
/* data is in 8.8(q8) format for the 256 point */
/* data is in 6.10(q10) format for the 64 point */
/* data is in 4.12(q12) format for the 16 point */
/* Initializations for the last stage */
j = fftLen >> 2;
ptr1 = &pSrc16[0];
/* start of last stage process */
/* Butterfly implementation */
do
{
/* Read xa (real), ya(imag) input */
xaya = *__SIMD32(ptr1)++;
/* Read xb (real), yb(imag) input */
xbyb = *__SIMD32(ptr1)++;
/* Read xc (real), yc(imag) input */
xcyc = *__SIMD32(ptr1)++;
/* Read xd (real), yd(imag) input */
xdyd = *__SIMD32(ptr1)++;
/* R = packed((ya + yc), (xa + xc)) */
R = __QADD16(xaya, xcyc);
/* T = packed((yb + yd), (xb + xd)) */
T = __QADD16(xbyb, xdyd);
/* pointer updation for writing */
ptr1 = ptr1 - 8u;
/* xa' = xa + xb + xc + xd */
/* ya' = ya + yb + yc + yd */
*__SIMD32(ptr1)++ = __SHADD16(R, T);
/* T = packed((yb + yd), (xb + xd)) */
T = __QADD16(xbyb, xdyd);
/* xc' = (xa-xb+xc-xd) */
/* yc' = (ya-yb+yc-yd) */
*__SIMD32(ptr1)++ = __SHSUB16(R, T);
/* S = packed((ya - yc), (xa - xc)) */
S = __QSUB16(xaya, xcyc);
/* Read yd (real), xd(imag) input */
/* T = packed( (yb - yd), (xb - xd)) */
U = __QSUB16(xbyb, xdyd);
/* xb' = (xa+yb-xc-yd) */
/* yb' = (ya-xb-yc+xd) */
*__SIMD32(ptr1)++ = __SHSAX(S, U);
/* xd' = (xa-yb-xc+yd) */
/* yd' = (ya+xb-yc-xd) */
*__SIMD32(ptr1)++ = __SHASX(S, U);
} while(--j);
}
.type arm_bitreversal_16, %function
arm_bitreversal_16 PROC
ADDS r3,r1,#1
CMP r3,#1
IT LS
BXLS lr
PUSH {r4-r9}
ADDS r1,r2,#2
LSRS r3,r3,#2
arm_bitreversal_16_0 LABEL ;/* loop unrolled by 2 */
LDRH r8,[r1,#4]
LDRH r9,[r1,#2]
LDRH r2,[r1,#0]
LDRH r12,[r1,#-2]
ADD r8,r0,r8,LSR #1
ADD r9,r0,r9,LSR #1
ADD r2,r0,r2,LSR #1
ADD r12,r0,r12,LSR #1
LDR r7,[r9,#0]
LDR r6,[r8,#0]
LDR r5,[r2,#0]
LDR r4,[r12,#0]
STR r6,[r9,#0]
STR r7,[r8,#0]
STR r5,[r12,#0]
STR r4,[r2,#0]
ADDS r1,r1,#8
SUBS r3,r3,#1
BNE arm_bitreversal_16_0
POP {r4-r9}
BX lr
ENDP
MATLAB m文件代码
N=32;
n=0:N-1;
TestIn=[100 200 300 400 100 200 300 400 100 200 300 400 100 200 300 400 100 200 300 400 100 200 300 400 100 200 300 400 100 200 300 400 ];
Xk=fft(TestIn)
plot(f, abs(Xk)/32);
xlabel('频率');
ylabel('幅度');