//对数据进行一个滤波的过程
#include
extern uchar* p_in_u8;//调用其他模块的变量->输入数据
extern uchar* p_out_u8;//调用其他模块的变量->输出数据
void vecc_example(int s32SrcStep, int s32DstStep, uint u32N)
{
uchar32 v0,v1,v2; //类型为 32个uchar长度的向量 共256bit长
ushort16 v3,v_coeff; //16个ushort长向量
ushort j;
uint16 vacc0 = (uchar32) 0; //16个uint长向量用32个uchar长向量来初始化,只初始化了前8X32bit
ushort coeff[16] = {1,2,1,0,2,4,2,0,1,2,1,0,0,0,0,0};
v_coeff = *(ushort16*)coeff; //用上面的数组来初始化v_coeff
unsigned short vprMask; //掩码
for (j = 0; j < u32N; ++j)
{
/* 将数据拷贝到v0,v1,v2中去,注意宽度和指针回退 */
v0 = *(uchar32*)p_in_u8;
p_in_u8 += s32SrcStep;
v1 = *(uchar32*)p_in_u8;
p_in_u8 += s32SrcStep;
v2 = *(uchar32*)p_in_u8;
p_in_u8 -= s32SrcStep;
vacc0 = (uint16) vswmpy5(v0, v0, 0,v_coeff,0); //v0和v0向量拼起来的数据进行滑窗乘积,核为v_coeff
vacc0 = vswmac5(v1, v1, 0,v_coeff,4,vacc0); //滑窗累积
v3 = (ushort16) vswmac5(v2, v2, 0,v_coeff,8,vacc0,4); //滑窗累积
vst(sat, v3, (uchar16*)p_out_u8, vprMask);//转存
p_out_u8+=s32DstStep;
}
}