DSP

TI DSP TMS320C66x学习笔记之内联指令(c6x.h中文注释)(六)

2019-07-13 10:22发布

/*****************************************************************************/ /* C6X.H v7.4.12 */ /*****************************************************************************/ #include "vect.h" typedef double __float2_t;//__float2_t双精度浮点型 #define _lltof2 _lltod #define _f2toll _dtoll //将一个__float2_t解释成一个long long #define _ftof2 _ftod #define _hif2 _hif #define _lof2 _lof #define _f2tol _dtol //将一个__float2_t解释成一个__int40 #define _ltof2 _ltod #define _amem8_f2 _amemd8 //加载和存储8bytes,指针必须8byte对齐,必须包含c6x.h #define _amem8_f2_const _amemd8_const //加载8bytes,指针必须8byte对齐,必须包含c6x.h #define _mem8_f2 _memd8 //从内存里加载一个64位值 #define _mem8_f2_const _memd8_const #define _fdmv_f2 _fdmv #define _hif2_128 _hid128 #define _lof2_128 _lod128 #define _f2to128 _dto128 #define _fdmvd_f2 _fdmvd /*从src2里提取csta和cstb指定的区域且符号扩展到32位。提取出的区域先符号左移再右移。*/ int _ext (int src2, unsigned csta, unsigned cstb); int _extr (int src2, int src1);//同上,区别左右移的位数由src1的低10位指定 unsigned _extu (unsigned src2, unsigned csta, unsigned cstb);//同上上,区别最后是0扩展到32位 unsigned _extur (unsigned src2, int src1);//同上,区别左右移的位数由src1的低10位指定 /*通过cstb和csta指定需要置1的首位和尾位*/ unsigned _set (unsigned src, unsigned csta, unsigned cstb); unsigned _setr (unsigned src2, int src1);//将src2中指定位置1,置1的首位和末位有src1的低10位指定 /*通过cstb和csta指定需要清0的首位和尾位*/ unsigned _clr (unsigned src, unsigned csta, unsigned cstb); unsigned _clrr (unsigned src2, int src1);//将src2中指定位清0,清0的首位和末位有src1的低10位指定 /*将src1和src2相加,且饱和其结果*/ int _sadd (int, int); __int40_t _lsadd (int, __int40_t); /*从src1中减去src2并饱和结果*/ int _ssub (int src1, int src2); __int40_t _lssub (int, __int40_t); /*将一个40bit的long转换成一个32bit的有符号int,如有需要,对结果进行饱和*/ int _sat (__int40_t); /*将src2左移src1位,结果饱和在32位*/ int _sshl (int src2, unsigned src1); /*把src1的高、低16位和src2的高、低16位分别相加,放入结果的高、低16位*/ int _add2 (int, int); int _sub2 (int, int);/*把src1的高、低16位减去src2的高、低16位,放入结果的高、低16位*/ /*有条件减和左移(常用于除法)*/ unsigned _subc (unsigned, unsigned); /*搜索src2里面的1或0,1或0是由src1的LSB决定的,返回比特位变换的位数*/ unsigned _lmbd (unsigned src1, unsigned src2); /*返回src的绝对值*/ int _abs (int src); __int40_t _labs (__int40_t src); /*返回src的冗余的符号比特位的个数,bit31是符号位,例如bit31往低位走,01b返回0,001b返回1,0001b返回2,00001b返回3*/ unsigned _norm (int); unsigned _lnorm (__int40_t); //16 LSBs * 16 LSBs int _mpy (int src1, int src2);/*src1和src2相乘,操作数默认为有符号*/ int _mpyus (unsigned src1, int src2);/*无符号src1和有符号src2相乘,S是用来确定哪个是有符号数(S)哪个是无符号数(U)*/ int _mpysu (int, unsigned); unsigned _mpyu (unsigned, unsigned); //16 MSBs * 16 MSBs int _mpyh (int, int); int _mpyhus (unsigned, int); int _mpyhsu (int, unsigned); unsigned _mpyhu (unsigned, unsigned); //16 MSBs * 16 LSBs int _mpyhl (int, int); int _mpyhuls (unsigned, int); int _mpyhslu (int, unsigned); //16 LSBs * 16 MSBs int _mpylh (int, int); int _mpyluhs (unsigned, int); int _mpylshu (int, unsigned); unsigned _mpylhu (unsigned, unsigned); //与上面的乘法指令区别在何处 /*把src1的低16位和src2的低16位相乘,之后左移一位*/ int _smpy (int src1, int src2); int _smpyhl (int, int);//高16位 * 低16位 int _smpylh (int, int);//低16位 * 高16位 int _smpyh (int, int);//把src1的高16位和src2的高16位相乘 /*将src1和src2中的2对16位有符号数相乘,然后左移1位,再进行饱和*/ long long _smpy2ll (int, int); /*32位有符号数乘以32位有符号数,64位的结果左移1位然后饱和,然后将之后的结果的高32位写入dst*/ int _smpy32 (int, int); /*返回double寄存器的高32位(奇数位寄存器)作为int型*/ unsigned _hi(double); /*返回double寄存器的高32位(奇数位寄存器)作为float型*/ float _hif(double); /*返回long long寄存器的高32位(奇数位寄存器)作为int型*/ unsigned _hill(long long); /*返回double寄存器的低32位(偶数位寄存器)作为int型*/ unsigned _lo(double); /*返回double寄存器的低32位(偶数位寄存器)作为float型*/ float _lof(double); /*返回long long寄存器的低32位(偶数位寄存器)作为int型*/ unsigned _loll(long long); /*创建一个新的double寄存器来保存2个unsigned int的值,其中src2是高(奇数)寄存器,src1是低(偶数)寄存器*/ double _itod(unsigned, unsigned); /*创建一个新的double寄存器来保存2个float的值,其中src2是高(奇数)寄存器,src1是低(偶数)寄存器*/ double _ftod(float, float); /*创建一个新的long long寄存器来保存2个unsigned int的值,其中src2是高(奇数)寄存器,src1是低(偶数)寄存器*/ long long _itoll(unsigned src2, unsigned src1); /*注意不是变换,是直接将寄存器中的值解释成整型或者浮点型*/ float _itof(unsigned); /* 将一个int寄存器重新解释成一个float型 */ unsigned _ftoi(float); /* 将float的比特位解释成unsigned int */ __int40_t _dtol(double); /* 将一个double寄存器重新解释成一个_int40_t型*/ double _ltod(__int40_t); /* 将一个__int40_t寄存器重新解释成一个double型*/ long long _dtoll(double); /* 将一个double寄存器重新解释成一个long long型*/ double _lltod(long long); /* 将一个long long寄存器重新解释成double型 */ /* Define pseudo intrinsics for some pseudo instructions */ #ifndef _cmplt2 #define _cmplt2(src1, src2) _cmpgt2((src2), (src1)) #endif #ifndef _cmpltu4 #define _cmpltu4(src1, src2) _cmpgtu4((src2), (src1)) #endif #ifndef _dotpnrus2 #define _dotpnrus2(src1, src2) _dotpnrsu2((src2), (src1)) #endif #ifndef _dotpus4 #define _dotpus4(src1, src2) _dotpsu4((src2), (src1)) #endif #ifndef _mpyihll #define _mpyihll(src1, src2) _mpyhill((src2), (src1)) #endif #ifndef _mpyihr #define _mpyihr(src1, src2) _mpyhir((src2), (src1)) #endif #ifndef _mpyilll #define _mpyilll(src1, src2) _mpylill((src2), (src1)) #endif #ifndef _mpyilr #define _mpyilr(src1, src2) _mpylir((src2), (src1)) #endif #ifndef _mpyus4ll #define _mpyus4ll(src1, src2) _mpysu4ll((src2), (src1)) #endif #ifndef _saddsu2 #define _saddsu2(src1, src2) _saddus2((src2), (src1)) #endif #ifndef _swap2 #define _swap2(src) _packlh2((src), (src)) #endif /*把src1和src2的4对8bits相加,不会进行饱和,进位不会影响其他的8位数*/ int _add4 (int, int); /*计算2对有符号16位数的平均值*/ int _avg2 (int, int); /*计算4对有符号8位数的平均值*/ unsigned _avgu4 (unsigned, unsigned); /*比较2对16位数的值是否相等,结果放入dst的最低2位,相等返回1*/ int _cmpeq2 (int, int); /*比较4对8位数的值是否相等,结果放入dst的最低4位,相等置1,否则为0*/ int _cmpeq4 (int, int); /*比较2对有符号16位数,src1 > src2,置为1;否则为0.结果放入dst的最低2位*/ int _cmpgt2 (int src1, int src2); /*比较4对无符号8位数,src1 > src2,置为1;否则为0.结果放入dst的最低4位*/ unsigned _cmpgtu4 (unsigned src1, unsigned src2); /*将src1和src2中的2对16位有符号数进行点积(相加),结果被写成有符号32位int或者符号扩展为64位*/ int _dotp2 (int , int); __int40_t _ldotp2 (int, int); /*将src1和src2中的16位有符号数进行点积相减*/ int _dotpn2 (int, int); /*src1和src2中的高16位的点积“减去”低16位的点积。src1中的数被当做有符号数,src2中的数被当做无符号数,再加上2^15,结果再带符号右移16位*/ int _dotpnrsu2 (int src1, unsigned src2); /*src1和src2中的高16位的点积“加上”低16位的点积。src1中的数被当做有符号数,src2中的数被当做无符号数,再加上2^15,结果再带符号右移16位*/ int _dotprsu2 (int, unsigned); /*将src1和src2的4对8位数进行相乘再求和,src1的每8位数被当做有符号数,src2的每8位数被当做无符号数*/ int _dotpsu4 (int, unsigned); unsigned _dotpu4 (unsigned, unsigned);//同上,都被当做无符号数 /*将src1和src2的4对8位无符号数进行迦罗瓦域的乘法*/ int _gmpy4 (int, int); /*将src1和src2中的2对16位有符号数比较,取较大值*/ int _max2 (int, int); /*将src1和src2中的4对8位无符号数比较,取较大值*/ unsigned _maxu4 (unsigned, unsigned); /*将src1和src2中的2对16位有符号数比较,取较小值*/ int _min2 (int, int); /*将src1和src2中的4对8位无符号数比较,取较小值*/ unsigned _minu4 (unsigned, unsigned); /*将src1和src2中的2对16位有符号数分别相乘,将2个32位的结果写入long long中*/ long long _mpy2ll (int, int); /*将src1中高16位作为1个16位有符号数乘以src2的32位有符号数,结果写入long long的低48位*/ long long _mpyhill (int src1, int src2); /*将src1中低16位作为1个16位有符号数乘以src2的32位有符号数,结果写入long long的低48位*/ long long _mpylill (int, int); /*将src1的高16位作为1个16位有符号数乘以src2的32位有符号数。乘积利用round模式通过加2^14转成32位,最后再右移15位*/ int _mpyhir (int, int); /*将src1的低16位作为1个16位有符号数乘以src2的32位有符号数。乘积利用round模式通过加2^14转成32位,最后再右移15位*/ int _mpylir (int, int); /*将src1的4个8位有符号乘src2的4个8位无符号数,得到4个16位有符号数,组成一个64位*/ long long _mpysu4ll (int src1, unsigned src2); long long _mpyu4ll (unsigned, unsigned);//同时,都是无符号数 /*将src1的低16位放入返回值的高16位,src2的低16位放入返回值的低16位*/ unsigned _pack2 (unsigned src1, unsigned src2); /*将src1的高16位放入返回值的高16位,src2的高16位放入返回值的低16位*/ unsigned _packh2 (unsigned, unsigned); /*将src1(a_3|a_2|a_1|a_0)和src2(b_3|b_2|b_1|b_0)分成4对8位无符号数,返回a_3|a_1|b_3|b_1*/ unsigned _packh4 (unsigned, unsigned); /*将src1(a_3|a_2|a_1|a_0)和src2(b_3|b_2|b_1|b_0)分成4对8位无符号数,返回a_2|a_0|b_2|b_0*/ unsigned _packl4 (unsigned, unsigned); /*将src1(a_hi|a_lo)和src2(b_hi|b_lo)分成2对16位无符号数,返回a_hi|b_lo*/ unsigned _packhl2 (unsigned, unsigned); /*将src1(a_hi|a_lo)和src2(b_hi|b_lo)分成2对16位无符号数,返回a_lo|b_hi*/ unsigned _packlh2 (unsigned, unsigned); /*按照src1的最低5位的数去左移src2的32位,src1中剩下的高5-31位被忽略*/ unsigned _rotl (unsigned src1, unsigned src2); /*将src1和src2中的2个16位有符号数相加,生成2个16位有符号数并饱和*/ int _sadd2 (int, int); /*将src1和src2中的4对8位无符号数相加并饱和*/ unsigned _saddu4 (unsigned, unsigned); /*将src1中的2个16位无符号数和src2中的2个16位有符号数相加,得到2个16位无符号数并饱和*/ int _saddus2 (unsigned src1, int src2); /*将src2左移1byte,然后将src1的最高1byte填充入src2左移后多出的位置*/ unsigned _shlmb (unsigned src1, unsigned src2); /*将src2右移1byte,然后将src1的最高1byte填充入src2右移后多出的位置*/ unsigned _shrmb (unsigned src1, unsigned src2); /*将src2的2个16位有符号数分别右移,右移位数由src1的低5位决定,多出的位置由符号位扩展*/ int _shr2 (int src1, unsigned src2); /*将src2的2个16位有符号数分别右移,右移位数由src1的低5位决定,多出的位置由0扩展*/ unsigned _shru2 (unsigned src1, unsigned src2); /*将src1和src2中的1个32位有符号进行饱和到有符号16位,然后把src1的高16位,src2的饱和结果放入dst的低16位,不懂就看指令手册*/ int _spack2 (int src1, int src2); /*将src1和src2中的2对16位有符号数饱和成8位无符号数*/ unsigned _spacku4 (int, int); /* * * 将src2中的32位有符号数左移或右移,移位的数量由src1指定的比特数确定。 * src1在[-31,31]之间,如果src1为正,src2则左移;如果src1为负,src2右移|src1|且符号位扩展 * */ int _sshvl (int src2, int src1); /* * * 将src2中的32位有符号数左移或右移,移位的数量由src1指定的比特数确定。 * src1在[-31,31]之间,如果src1为正,src2则右移且是符号位扩展;如果src1为负,src2左移|src1| * */ int _sshvr (int src2, int src1); /*将src1和src2中的4对8位数相减,不进行饱和*/ int _sub4 (int src1, int src2); /*将src1和src2中的4对8位无符号数相减求绝对值*/ int _subabs4 (int src1, int src2); /*分别计算高、低16位的绝对值*/ int _abs2 (int); /*统计4对8位比特数是1的个数,写入结果对应位置*/ unsigned _bitc4 (unsigned); /*颠倒比特位的顺序*/ unsigned _bitr (unsigned); /*将src中的比特位的奇数位和偶数位抽出来进行重组,偶数位放在低的16位,奇数位放在高的16位*/ unsigned _deal (unsigned); /*将src的数据移入返回值中,利用了乘法流水线(延迟4cycle)*/ int _mvd (int src2); /*将src2的高16和低16位进行交织abcdefghijklmnop|ABCDEFGHIJKLMNOP,返回aAbBcCdDeEfFgGhH|iIjJkKlLmMnNoOpP*/ unsigned _shfl (unsigned src2); /*将src4个8位无符号数(ub_3|ub_2|ub_1|ub_0|)交换位置,返回(ub_2|ub_3|ub_0|ub_1|)*/ unsigned _swap4 (unsigned); /*扩展0(高16位2个8位数),(ub_3|ub_2|ub_1|ub_0|)-->(0|ub_3|0|ub_2|),每个分隔号代表1byte*/ unsigned _unpkhu4 (unsigned); /*扩展0(低16位2个8位数),(ub_3|ub_2|ub_1|ub_0|)-->(0|ub_1|0|ub_0|),每个分隔号代表1byte*/ unsigned _unpklu4 (unsigned); /*按src的最低2位进行扩展,bit1扩展高16位,bit0扩展低16位*/ unsigned _xpnd2 (unsigned); unsigned _xpnd4 (unsigned);//同上,按src的最低4位进行扩展,例bit0扩展低8位。 /*平行做2步,1.src1+src2->dst_o 2.src1-src2->dst_e,dst_o代表奇数寄存器,dst_e代表偶数寄存器*/ long long _addsub (int src1, int src2); /*同上,分成2对16位有符号数进行ADD2和SUB2*/ long long _addsub2 (unsigned, unsigned); /* * * 饱和(src1低16位和src2高16位的点积加上src1高16位和src2低16位点积)赋给dst_e * sat((lsb16(src1) × msb16(src2)) + (msb16(src1) × lsb16(src2))) → dst_e * 有符号16位src1和src2的高16位的点积减去src1和src2低16位的点积赋给dst_o * (msb16(src1) × msb16(src2)) - (lsb16(src1) × lsb16(src2)) → dst_o * */ long long _cmpy (unsigned src1, unsigned src2); /* * * sat((lsb16(src1) × msb16(src2)) + (msb16(src1) × lsb16(src2))) → tmp_e * msb16(sat(tmp_e + 00008000h)) → lsb16(dst) * sat((msb16(src1) × msb16(src2)) - (lsb16(src1) × lsb16(src2))) → tmp_o * msb16(sat(tmp_o + 00008000h)) → msb16(dst) * */ unsigned _cmpyr (unsigned, unsigned); /* * * sat((lsb16(src1) × msb16(src2)) + (msb16(src1) × lsb16(src2))) → tmp_e * msb16(sat((tmp_e + 00004000h) << 1)) → lsb16(dst) * sat((msb16(src1) × msb16(src2)) - (lsb16(src1) × lsb16(src2))) → tmp_o * msb16(sat((tmp_e + 00004000h) << 1)) → msb16(dst) * */ unsigned _cmpyr1 (unsigned, unsigned); /*看TMS320C6000内联指令汇编的35页图示,完成点积和功能*/ long long _ddotph2 (long long, unsigned); unsigned _ddotph2r (long long, unsigned); long long _ddotpl2 (long long, unsigned); unsigned _ddotpl2r (long long, unsigned); long long _ddotp4 (unsigned src1, unsigned src2); /*数据打包看37页图示*/ long long _dpack2 (unsigned src1, unsigned src2); long long _dpackx2 (unsigned, unsigned); /*将两个寄存器移入一个寄存器一次性的*/ long long _dmv (unsigned, unsigned); double _fdmv (float, float); /*迦罗瓦域上的乘法*/ unsigned _gmpy (unsigned, unsigned); /*进行32位乘32位。都是有符号数,64位结果都被写入dst*/ long long _mpy32ll (int, int); /*进行32位乘32位。都是有符号数,64位结果中的低32位写入dst*/ int _mpy32 (int, int); /*src1有符号32位乘以src2无符号32位=有符号64位*/ long long _mpy32su (int, unsigned); long long _mpy32us (unsigned, int); //同上,无符号乘以有符号 long long _mpy32u (unsigned, unsigned);//同上上,都为无符号 /* * * 进行16位乘32位。将src1的高16位和低16位当做有符号16位;将src2的值当做有符号32位。 * 乘积通过加上2^14 round到32位,然后结果右移15位。2个结果的低32位写入dst_o:dst_e * */ long long _mpy2ir (unsigned src1, int src2); /*src1和src2的高16位,分别右移1位后饱和,放入32位结果中*/ unsigned _rpack2 (unsigned src1, unsigned src2); /*并行进行1.饱和(src1+src2)->dst_o 2.饱和(src1-src2)->dst_e*/ long long _saddsub (int, int); long long _saddsub2 (unsigned, unsigned);//并行进行SADD2和SSUB2指令 /*不知道干嘛用的,看39页*/ long long _shfl3 (unsigned, unsigned); /*src1中的2个16位有符号数减去src2中的2个16位有符号数*/ int _ssub2 (int src1, int src2); /*迦罗瓦乘法*/ unsigned _xormpy (unsigned, unsigned); long long _dcmpyr1 (long long, long long); long long _dccmpyr1 (long long, long long); long long _cmpy32r1 (long long, long long); long long _ccmpy32r1 (long long, long long); long long _mpyu2 (unsigned, unsigned); /*4对8位有符号数点积相加*/ int _dotp4h (long long, long long); long long _dotp4hll (long long, long long); /*src1中的4对16位有符号数,src2中的4对16位无符号数,得到一个32位的点积和*/ int _dotpsu4h (long long, long long); /*src1中的4对16位有符号数,src2中的4对16位无符号数,得到一个64位的点积和*/ long long _dotpsu4hll (long long, long long); /*src1的2个32位有符号数加上src2的2个32位有符号数*/ long long _dadd (long long src1, long long src2); long long _dadd_c (int, long long); long long _dsadd (long long, long long); /*4对16位有符号数相加*/ long long _dadd2 (long long, long long); long long _dsadd2 (long long, long long); long long _dsub (long long, long long); long long _dssub (long long, long long); long long _dssub2 (long long, long long); long long _dapys2 (long long, long long); long long _dshr (long long, unsigned); long long _dshru (long long, unsigned); long long _dshl (long long, unsigned); long long _dshr2 (long long, unsigned); long long _dshru2 (long long, unsigned); unsigned _shl2 (unsigned , unsigned); long long _dshl2 (long long, unsigned); long long _dxpnd4 (unsigned); long long _dxpnd2 (unsigned); int _crot90 (int); long long _dcrot90 (long long); int _crot270 (int); long long _dcrot270 (long long); /*src1和src2中的4对16位有符号数比较,取大者放入dst*/ long long _dmax2 (long long, long long); long long _dmin2 (long long, long long); /*src1和src2中的8对8位有符号数比较,取大者放入dst*/ long long _dmaxu4 (long long, long long); long long _dminu4 (long long, long long); /*4对16位比较,相等返回1,不等返回0*/ unsigned _dcmpeq2 (long long, long long); /*8对8位比较,相等返回1,不等返回0*/ unsigned _dcmpeq4 (long long, long long); /*4对16位比较,大于返回1,不大于返回0*/ unsigned _dcmpgt2 (long long, long long); /*8对8位比较,大于返回1,不大于返回0*/ unsigned _dcmpgtu4 (long long, long long); /*4对16位有符号数求4个平均"(a+b+1)/2"*/ long long _davg2 (long long, long long); /*8对8位无符号数求8个平均*/ long long _davgu4 (long long, long long); /*有符号16位,无round模式,4个平均"(a+b)/2"*/ long long _davgnr2 (long long, long long); /*无符号8位,无round模式,8个平均*/ long long _davgnru4 (long long, long long); long long _unpkbu4 (unsigned); long long _unpkh2 (unsigned); long long _unpkhu2 (unsigned); /*并行执行2个PACKL2*/ long long _dpackl2 (long long, long long); /*并行执行2个PACKH2*/ long long _dpackh2 (long long, long long); long long _dpackhl2 (long long, long long); /*并行执行PACKH4和PACKL4*/ long long _dpacklh4 (unsigned, unsigned); long long _dpackl4 (long long, long long); long long _dpackh4 (long long, long long); long long _dspacku4 (long long, long long); void _mfence (); __float2_t _dmpysp (__float2_t, __float2_t); /*2路float型相加*/ __float2_t _daddsp (__float2_t, __float2_t); __float2_t _dsubsp (__float2_t, __float2_t); /*src中的16位有符号数转换成单精度浮点放入dst_e和dst_o中*/ __float2_t _dinthsp (unsigned src); /*src中的16位无符号数转换成单精度浮点放入dst_e和dst_o中*/ __float2_t _dinthspu (unsigned); /*src中的32位有符号转换成单精度浮点,放入dst_e和dst_o中*/ __float2_t _dintsp (long long); /*src中的32位无符号转换成单精度浮点,放入dst_e和dst_o中*/ __float2_t _dintspu (long long); unsigned _dspinth (__float2_t); long long _dspint (__float2_t); int _land (int, int); int _landn (int, int); int _lor (int, int); /*将2个寄存器移入1个寄存器中。一次进行2次移动,当处理很多double word时很有用。减去寄存器压力*/ long long _dmvd (int, int); double _fdmvd (float, float); double _complex_mpysp (double, double); /* CMPYSP then DADDSP */ double _complex_conjugate_mpysp (double, double); /* CMPYSP then DSUBSP */ long long _xorll_c (int, long long); __x128_t __BUILTIN _dcmpy (long long, long long); __x128_t __BUILTIN _dccmpy (long long, long long); long long __BUILTIN _cmatmpyr1 (long long, __x128_t); long long __BUILTIN _ccmatmpyr1 (long long, __x128_t); __x128_t __BUILTIN _cmatmpy (long long, __x128_t); __x128_t __BUILTIN _ccmatmpy (long long, __x128_t); __x128_t __BUILTIN _qsmpy32r1 (__x128_t, __x128_t); __x128_t __BUILTIN _qmpy32 (__x128_t, __x128_t); __x128_t __BUILTIN _dsmpy2 (long long, long long); /*4对16位有符号数相乘,得到32位有符号数放入128位寄存器中*/ __x128_t __BUILTIN _dmpy2 (long long, long long); /*4对16位有符号数相乘,得到32位有符号数放入128位寄存器中*/ __x128_t __BUILTIN _dmpyu2 (long long, long long); /*将src1中8个8位有符号数乘以src2中8个8位无符号数,得到8个16位有符号数*/ __x128_t __BUILTIN _dmpysu4 (long long src1, long long src2); __x128_t __BUILTIN _dmpyu4 (long long, long long);//同上,都是无符号 __x128_t __BUILTIN _cmpysp (__float2_t, __float2_t); __x128_t __BUILTIN _qmpysp (__x128_t, __x128_t); /*执行2个dotp4h,都是有符号的*/ long long __BUILTIN _ddotp4h (__x128_t, __x128_t); /*执行2个dotpsu4h,一个有符号,一个无符号*/ long long __BUILTIN _ddotpsu4h (__x128_t, __x128_t); __x128_t __BUILTIN _ito128 (unsigned, unsigned, unsigned, unsigned); __x128_t __BUILTIN _fto128 (float, float, float, float); __x128_t __BUILTIN _llto128 (long long, long long); __x128_t __BUILTIN _dto128 (double, double); long long __BUILTIN _hi128 (__x128_t); double __BUILTIN _hid128 (__x128_t); long long __BUILTIN _lo128 (__x128_t); double __BUILTIN _lod128 (__x128_t); unsigned __BUILTIN _get32_128 (__x128_t, __CONST(0,3) unsigned); float __BUILTIN _get32f_128 (__x128_t, __CONST(0,3) unsigned); __x128_t __BUILTIN _dup32_128 (unsigned); extern __cregister volatile unsigned int AMR; extern __cregister volatile unsigned int CSR; extern __cregister volatile unsigned int IFR; extern __cregister volatile unsigned int ISR; extern __cregister volatile unsigned int ICR; extern __cregister volatile unsigned int IER; extern __cregister volatile unsigned int ISTP; extern __cregister volatile unsigned int IRP; extern __cregister volatile unsigned int NRP; extern __cregister volatile unsigned int GFPGFR; extern __cregister volatile unsigned int DIER; extern __cregister volatile unsigned int REP; extern __cregister volatile unsigned int TSCL; extern __cregister volatile unsigned int TSCH; extern __cregister volatile unsigned int ARP; extern __cregister volatile unsigned int ILC; extern __cregister volatile unsigned int RILC; extern __cregister volatile unsigned int PCE1; extern __cregister volatile unsigned int DNUM; extern __cregister volatile unsigned int SSR; extern __cregister volatile unsigned int GPLYA; extern __cregister volatile unsigned int GPLYB; extern __cregister volatile unsigned int TSR; extern __cregister volatile unsigned int ITSR; extern __cregister volatile unsigned int NTSR; extern __cregister volatile unsigned int ECR; extern __cregister volatile unsigned int EFR; extern __cregister volatile unsigned int IERR; extern __cregister volatile unsigned int DMSG; extern __cregister volatile unsigned int CMSG; extern __cregister volatile unsigned int DT_DMA_ADDR; extern __cregister volatile unsigned int DT_DMA_DATA; extern __cregister volatile unsigned int DT_DMA_CNTL; extern __cregister volatile unsigned int TCU_CNTL; extern __cregister volatile unsigned int RTDX_REC_CNTL; extern __cregister volatile unsigned int RTDX_XMT_CNTL; extern __cregister volatile unsigned int RTDX_CFG; extern __cregister volatile unsigned int RTDX_RDATA; extern __cregister volatile unsigned int RTDX_WDATA; extern __cregister volatile unsigned int RTDX_RADDR; extern __cregister volatile unsigned int RTDX_WADDR; extern __cregister volatile unsigned int MFREG0; extern __cregister volatile unsigned int DBG_STAT; extern __cregister volatile unsigned int BRK_EN; extern __cregister volatile unsigned int HWBP0_CNT; extern __cregister volatile unsigned int HWBP0; extern __cregister volatile unsigned int HWBP1; extern __cregister volatile unsigned int HWBP2; extern __cregister volatile unsigned int HWBP3; extern __cregister volatile unsigned int OVERLAY; extern __cregister volatile unsigned int PC_PROF; extern __cregister volatile unsigned int ATSR; extern __cregister volatile unsigned int TRR; extern __cregister volatile unsigned int TCRR;