#include #include using namespace cv;
using namespace std;
#define USE_T_API 1/*
* === FUNCTION ======================================================================
* Name: example
* Description:
* =====================================================================================
*/#ifndef USE_T_APIvoid example( Mat & img )
#elsevoid example( UMat & img )
{
#endif#ifndef USE_T_API
Mat _gray, _canny;
#else
UMat _gray, _canny;
#endifdouble t = (double)getTickCount();
cvtColor( img, _gray, COLOR_BGR2GRAY );
GaussianBlur( _gray, _gray, Size(5, 5), 3, 3 );
Canny( _gray, _canny, 10, 100, 3, true );
t = ((double)getTickCount() - t)/getTickFrequency();
cout << "Times passed in seconds: " << t << endl;
namedWindow( "Gray", WINDOW_AUTOSIZE );
namedWindow( "Canny", WINDOW_AUTOSIZE );
imshow( "Gray", _gray );
imshow( "Canny", _canny );
} /* ----- end of function example ----- *//*
* === FUNCTION ======================================================================
* Name: main
* Description:
* =====================================================================================
*/int main ( int argc, char *argv[] )
{
#ifndef USE_T_API
Mat _img = imread( argv[1], -1 );
#else
UMat _img;
imread( argv[1], -1 ).copyTo( _img );
#endifif( _img.empty() ) return -1;
example( _img );
waitKey( 0 );
return0;
} /* ---------- end of function main ---------- */
调查原因,在OpenCV网站上,找到如下对OpenCL模块的说明:
Some tidbits:
1. OpenCL version should be larger than 1.1with FULL PROFILE.
2. Currently there’s only one OpenCL context andcommandqueue. Wehopetoimplementmultideviceandmultiqueuesupportinthefuture.3. Many kernels use 256as its workgroup size if possible, so themax work group size ofthe device must larger than 256. All GPU devices we are aware of indeed support 256 workitems ina workgroup, however non GPU devices may not. This will be improved inthe future.
4. ...
OpenCV对OpenCL要求的最低要求是v1.1以上的Full版本,所以针对i.MX6的OpenCV采用OpenCL加速的调查,可以暂时结束了。
另外,考虑到我是要做实时图像处理,即使支持OpenCL加速,那么OpenCV和OpenCL之间数据传递的延时,也可能不满足要求,这个之后条件成熟了再调查。
更新:
部分OpenCV接口在i.MX6上,可以基于OpenCL 1.1 EP执行,但性能与CPU执行相比更慢,原因不明。