#define MAX_W 2592
static unsigned char inBuf[MAX_W*4]; // in L2 SRAM
static unsigned char outBuf[MAX_W*2]; // in L2 SRAM
static void smooth_one_line( const unsigned char* restrict in1, const unsigned char* restrict in2, const unsigned char* restrict in3, unsigned char* outLine, short outLen )
{
short j;
static char mask[16] = { 1,2,1, 2,4,2, 1,2,1, 0,0,0,0,0,0,0 };
unsigned int mask1_0, mask2_0, mask3_0;
unsigned int mask1_1, mask2_1, mask3_1;
double r1_76543210, r2_76543210, r3_76543210;
unsigned int r1_7654, r1_3210;
unsigned int r2_7654, r2_3210;
unsigned int r3_7654, r3_3210;
unsigned int r1_5432, r2_5432, r3_5432;
int sum0, sum1, sum2, sum3;
unsigned int sum32, sum10, sum3210;
// get 4 bytes, shift left 8 bits, shift right 8 bits
mask1_0 = _extu( _mem4_const(&mask[0]), 8, 8);
mask2_0 = _extu( _mem4_const(&mask[3]), 8, 8);
mask3_0 = _extu( _mem4_const(&mask[6]), 8, 8);
mask1_1 = mask1_0 << 8;
mask2_1 = mask2_0 << 8;
mask3_1 = mask3_0 << 8;
// the first pixel is *IN2
sum3 = *in2;
for( j=0; j7; j+=4 )
{
r1_76543210 = _memd8_const(&in1[j]);
r2_76543210 = _memd8_const(&in2[j]);
r3_76543210 = _memd8_const(&in3[j]);
r1_3210 = _lo(r1_76543210);
r2_3210 = _lo(r2_76543210);
r3_3210 = _lo(r3_76543210);
r1_7654 = _hi(r1_76543210);
r2_7654 = _hi(r2_76543210);
r3_7654 = _hi(r3_76543210);
sum0 = (_dotpsu4(mask1_0, r1_3210) + _dotpsu4(mask2_0, r2_3210)
+ _dotpsu4(mask3_0, r3_3210) + 8 ) >> 4;
sum1 = (_dotpsu4(mask1_1, r1_3210) + _dotpsu4(mask2_1, r2_3210)
+ _dotpsu4(mask3_1, r3_3210) + 8 ) >> 4;
// sum10 = _spack2(sum1, sum0);
sum10 = _spack2(sum0, sum3);
r1_5432 = _packlh2(r1_7654, r1_3210);
r2_5432 = _packlh2(r2_7654, r2_3210);
r3_5432 = _packlh2(r3_7654, r3_3210);
sum2 = (_dotpsu4(mask1_0, r1_5432) + _dotpsu4(mask2_0, r2_5432)
+ _dotpsu4(mask3_0, r3_5432) + 8 ) >> 4;
sum3 = (_dotpsu4(mask1_1, r1_5432) + _dotpsu4(mask2_1, r2_5432)
+ _dotpsu4(mask3_1, r3_5432) + 8 ) >> 4;
// sum32 = _spack2(sum3, sum2);
sum32 = _spack2(sum2, sum1);
sum3210 = _spacku4(sum32, sum10);
_mem4(&outLine[j]) = sum3210;
}
// process the rest
outLine[j] = sum3;
for( ; j1; j++ )
{
outLine[j] = ( in1[j-1] + in1[j]*2 + in1[j+1] + in2[j-1]*2 + in2[j]*4 + in2[j+1]*2 + in3[j-1] + in3[j]*2 + in3[j+1] + 8 ) / 16;
}
outLine[j] = in2[j];
return;
}
void smooth_pingpang( unsigned char* inPtr, short inW, unsigned char* outPtr, short outW,short outH )
{
// ...
short i,t;
unsigned char *p1, *p2, *p3, *p4,*pTmp;
unsigned char *pout1, *pout2;
unsigned char *pS, *pD;
// ...
p1 = inPtr;
p2 = inBuf;
DMA_FAST1DTO1D(p2,p1,outW);
p2 += MAX_W;
p1 += inW;
DMA_FAST1DTO1D(p2,p1,outW);
p2 += MAX_W;
p1 += inW;
DMA_FAST1DTO1D(p2,p1,outW);
//TODO wait
p1 = inBuf;
p2 = p1+MAX_W;
p3 = p2+MAX_W;
p4 = p3+MAX_W;
pS = inPtr+3*inW;
pD = outPtr;
pout1 = outBuf;
pout2 = pout1+MAX_W;
// p1 --> out frist
DMA_FAST1DTO1D(pD,p1,outW);
pD += inW;
t = (int)((outH-3)/2);
// process p1,p2,p3 => out 1
smooth_one_line( p1, p2, p3, pout1, outW );
for( i=0; i//循环次数保证 剩余偶数 如果单数 外面多拷贝一行 TODO?????
{
// DMA, ext mem -> inBuff2, outW
DMA_FAST1DTO1D(p4,pS,inW);
pS+=inW;
// DMA, out 1 -> ext mem, outW
DMA_FAST1DTO1D(pD,pout1,outW);
pD += inW;
pTmp = p1;
p1 = p2;
p2 = p3;
p3 = p4;
p4 = pTmp;
// process( 1, 2, 3 ) >>out 2
smooth_one_line( p1, p2, p3, pout2, outW );
// DMA, ext mem -> inBufA
DMA_FAST1DTO1D(p4,pS,inW);
pS += inW;
// DMA, out2 -> ext mem
DMA_FAST1DTO1D(pD,pout2,outW);
pD += inW;
pTmp = p1;
p1 = p2;
p2 = p3;
p3 = p4;
p4 = pTmp;
// process( 1, 2, 3 ), >> out 1
smooth_one_line( p1, p2, p3, pout1, outW);
}
if(0==((outH-3)%2))
{
//拷贝最后一行
// p1 -> out last 1最后一行的拷贝
pD += inW;
DMA_FAST1DTO1D( pD, p3, outW );
}
else
{
DMA_FAST1DTO1D(pD,pout1,outW);
pD += inW;
//处理倒数第二行
//process( 1, 2, 3 );
smooth_one_line( p1, p2, p3, pout2, inW );
// p1 -> out last 1最后一行的拷贝
//拷贝最后两行
DMA_FAST1DTO1D( pD, pout2, outW );
pD += inW;
DMA_FAST1DTO1D( pD, p3, outW );
}
ITG_DMA_FastWait( 63 );
return;
}
void smooth_pingpang_upsetdown( unsigned char* inPtr, short inW, unsigned char* outPtr, short outW, short outH )
{
// ...
short i,t;
unsigned char *p1, *p2, *p3, *p4,*pTmp;
unsigned char *pout1, *pout2;
unsigned char *pS, *pD;
// ...
p1 = inPtr;
p2 = inBuf;
DMA_FAST1DTO1D(p2,p1,outW);
p2 += MAX_W;
p1 += inW;
DMA_FAST1DTO1D(p2,p1,outW);
p2 += MAX_W;
p1 += inW;
DMA_FAST1DTO1D(p2,p1,outW);
//TODO wait
p1 = inBuf;
p2 = p1+MAX_W;
p3 = p2+MAX_W;
p4 = p3+MAX_W;
pS = inPtr+3*inW;
pD = outPtr + outH*outW;//.........
pout1 = outBuf;
pout2 = pout1+MAX_W;
// p1 --> out frist
pD -= outW;//.........
t = (int)((outH-2)/2);
//TODO process p1,p2,p3 => out 1
for( i=0; i2 ) //循环次数保证 剩余偶数 如果单数 外面多拷贝一行 TODO?????
{
// DMA, ext mem -> inBuff2, outW
DMA_FAST1DTO1D(p4,pS,outW);
pS+=inW;
// DMA, out 1 -> ext mem, outW
DMA_FAST1DTO1D(pD,pout1,outW);
pD -= outW;//.........
pTmp = p1;
p1 = p2;
p2 = p3;
p3 = p4;
p4 = pTmp;
// process( 1, 2, 3 ) >>out 2
// DMA, ext mem -> inBufA
DMA_FAST1DTO1D(p4,pS,outW);
pS += inW;
// DMA, out2 -> ext mem
DMA_FAST1DTO1D(pD,pout2,outW);
pD -= outW;//.........
pTmp = p1;
p1 = p2;
p2 = p3;
p3 = p4;
p4 = pTmp;
// process( 1, 2, 3 ), >> out 1
}
DMA_FAST1DTO1D(pD,p1,outW);
if(0==((outH-2)%2))
{
DMA_FAST1DTO1D( pD, p4, outW );//拷贝最后一行
// p1 -> out last 1最后一行的拷贝
}
else
{
//处理倒数第二行
// p1 -> out last 1最后一行的拷贝
//拷贝最后两行
DMA_FAST1DTO1D( pD, p2, outW );
pD+=outW;
DMA_FAST1DTO1D( pD, p3, outW );
}
ITG_DMA_FastWait( 63 );
return;
}