7.1 ipu_device.c分析(一)---流程分析
2019-07-12 13:41发布
生成海报
在ipu_common.c文件的ipu_probe函数中,最后调用到register_ipu_device函数,这个函数在ipu_device.c中,所以从这个文件开始分析。
这个文件中主要是两个内核线程的执行过程,根据FTF-CON-F0119.pdf中写的:
(1)每个IPU有两个内核线程为了ICtask(PP&VF)
(2)每一个内核线程通过将task添加到它的taskqueue
list中来执行这个task
(3)每一个task执行流程是:ipu_init_channel--->ipu_init_channel_buffer--->request_ipu_irq--->ipu_enable_channel--->wait_irq(taskfinish)--->ipu_disable_channel--->ipu_uninit_channel
根据我的初步分析,这几个函数中有的函数名称已经发生变化,暂时先这样写,后面再具体分析。
(4)tasks基于单缓冲区模式
(5)应用中只需要准备一个task,然后将这个task加入队列中即可。
(6)task操作包括:
设置taskinput/output/overlay/rotation/deinterlacing/buffer
首先调用ioctlIPU_CHECK_TASK,然后调用IPU_QUEUE_TASK这个ioctl将task加入队列中。
在ipu_common.c中的ipu_gen_init函数中,可以看到通过platform_driver_register(&mxcipu_driver);来将这个platform_driver类型的mxcipu_driver结构体注册到platform平台总线上面了。这个结构体作为驱动的部分注册到平台总线上面,这个驱动匹配的设备是:ipu*。而当设备也注册到这个总线上面的时候,平台就会执行对应的match函数,最终调用到mxcipu_driver结构体里面的probe函数。
同时,在mxc_v4l2_capture.c中,同样是通过platform_driver_register(&mxc_v4l2_driver);函数来将mxc_v4l2_driver结构体作为驱动部分注册到平台总线上面,这个驱动匹配的设备是:v4l2_cap_*。
所以,暂时不理解这两者有什么关系。都是作为一个驱动来注册到platform平台上面的。而在一般的应用程序中,都是通过操作"/dev/video0"设备,而这个设备就是mxc_v4l2_capture.c中注册的v4l2_cap_*。这个设备包含v4l2里面很多的ioctl操作。
而想要操作ipu_common.c中注册的设备,是需要操作"/dev/mxc_ipu",这个设备只有简单的几个Ioctl操作,比如:IPU_CHECK_TASK,IPU_QUEUE_TASK等。
(一)register_ipu_device函数
int register_ipu_device(struct ipu_soc *ipu, int id)
{
int ret = 0;
static int idx;
static struct ipu_thread_data thread_data[5];
if (!major) {
major = register_chrdev(0, "mxc_ipu", &mxc_ipu_fops);
if (major < 0) {
printk(KERN_ERR "Unable to register mxc_ipu as a char device
");
ret = major;
goto register_cdev_fail;
}
/* 注册字符设备,主要是把这个mxc_ipu_fops结构体注册到内核中了。 */
ipu_class = class_create(THIS_MODULE, "mxc_ipu");
if (IS_ERR(ipu_class)) {
ret = PTR_ERR(ipu_class);
goto ipu_class_fail;
}
/* 创建类 */
ipu_dev = device_create(ipu_class, NULL, MKDEV(major, 0),
NULL, "mxc_ipu");
if (IS_ERR(ipu_dev)) {
ret = PTR_ERR(ipu_dev);
goto dev_create_fail;
}
/* 创建设备节点 */
ipu_dev->dma_mask = kmalloc(sizeof(*ipu_dev->dma_mask), GFP_KERNEL);
*ipu_dev->dma_mask = DMA_BIT_MASK(32);
ipu_dev->coherent_dma_mask = DMA_BIT_MASK(32);
/* 为struct device *ipu_dev中的 dma_mask项分配内存空间,然后设置它。 */
mutex_init(&ipu_ch_tbl.lock);
}
max_ipu_no = ++id;
ipu->rot_dma[0].size = 0;
ipu->rot_dma[1].size = 0;
/* 设置 max_ipu_no的值,它是根据ipu_common.c中的ipu_probe函数传过来的id参数设置的,暂时没分析。大致意思是获取的ipu数目。 */
thread_data[idx].ipu = ipu;
thread_data[idx].id = 0;
thread_data[idx].is_vdoa = 0;
ipu->thread[0] = kthread_run(ipu_task_thread, &thread_data[idx++],
"ipu%d_task", id);
if (IS_ERR(ipu->thread[0])) {
ret = PTR_ERR(ipu->thread[0]);
goto kthread0_fail;
}
/*这个idx是个函数局部变量,在这用的时候,上面只有一句声明:staticint
idx;并没有对它进行初始化操作,这是一个bug么???暂且认为它是0,然后设置thread_data[0]的ipu,id和is_vdoa参数。*/
/*查看ipu_soc结构体可以看到里面有这一项:structtask_struct
*thread[2];这个结构体准备了两个内核线程供我们使用。现在先调用kthread_run来创建并启动第一个内核线程。关于内核线程的知识,参看《Linux内核kthread_run函数理解学习》和《运行不息的内核线程kthread》这两个文件。*/
thread_data[idx].ipu = ipu;
thread_data[idx].id = 1;
thread_data[idx].is_vdoa = 0;
ipu->thread[1] = kthread_run(ipu_task_thread, &thread_data[idx++],
"ipu%d_task", id);
if (IS_ERR(ipu->thread[1])) {
ret = PTR_ERR(ipu->thread[1]);
goto kthread1_fail;
}
/*在这创建并启动第二个内核线程。这两个线程的运行函数都是ipu_task_thread。*/
return ret;
kthread1_fail:
kthread_stop(ipu->thread[0]);
kthread0_fail:
if (id == 0)
device_destroy(ipu_class, MKDEV(major, 0));
dev_create_fail:
if (id == 0) {
class_destroy(ipu_class);
}
ipu_class_fail:
if (id == 0)
unregister_chrdev(major, "mxc_ipu");
register_cdev_fail:
return ret;
}
(二)下面就到ipu_task_thread函数中去:
static int ipu_task_thread(void *argv)
{
struct ipu_task_entry *tsk;
struct ipu_task_entry *sp_tsk0;
struct ipu_split_task sp_task[4];
/* priority lower than irq_thread */
const struct sched_param param = {
.sched_priority = MAX_USER_RT_PRIO/2 - 1,
};
int ret;
int curr_thread_id;
uint32_t size;
unsigned long flags;
unsigned int cpu;
struct cpumask cpu_mask;
struct ipu_thread_data *data = (struct ipu_thread_data *)argv;
thread_id++;
curr_thread_id = thread_id;
sched_setscheduler(current, SCHED_FIFO, ¶m);
/*这个函数参考http://blog.csdn.net/allwtg/article/details/5254306《sched_setscheduler用法》*/
if (!data->is_vdoa) {
cpu = cpumask_first(cpu_online_mask);
cpumask_set_cpu(cpu, &cpu_mask);
/*这个函数参考http://blog.csdn.net/nirenxiaoxiao/article/details/21462053《Linuxcpumask分析》*/
ret = sched_setaffinity(data->ipu->thread[data->id]->pid,
&cpu_mask);
/*这个函数参考http://www.cnblogs.com/visayafan/archive/2011/12/10/2283375.html《【Linux】CPU亲和性(affinity)及与亲和性有关的两个函数sched_setaffinity()和sched_getaffinity()》*/
if (ret < 0) {
pr_err("%s: sched_setaffinity fail:%d.
", __func__, ret);
}
pr_debug("%s: sched_setaffinity cpu:%d.
", __func__, cpu);
}
while (!kthread_should_stop()) {
int split_fail = 0;
int split_parent;
int split_child;
/*这个函数参考http://blog.csdn.net/angle_birds/article/details/8206091《运行不息的内核线程kthread》*/
wait_event_interruptible(thread_waitq, find_task(&tsk, curr_thread_id));
if (!tsk) {
pr_err("thread:%d can not find task.
",
curr_thread_id);
continue;
}
/* note: other threads run split child task */
split_parent = need_split(tsk) && !tsk->parent;
split_child = need_split(tsk) && tsk->parent;
if (split_parent) {
if ((tsk->set.split_mode == RL_SPLIT) ||
(tsk->set.split_mode == UD_SPLIT))
size = 2;
else
size = 4;
ret = queue_split_task(tsk, sp_task, size);
if (ret < 0) {
split_fail = 1;
} else {
struct list_head *pos;
spin_lock_irqsave(&ipu_task_list_lock, flags);
sp_tsk0 = list_first_entry(&tsk->split_list,
struct ipu_task_entry, node);
list_del(&sp_tsk0->node);
list_for_each(pos, &tsk->split_list) {
struct ipu_task_entry *tmp;
tmp = list_entry(pos,
struct ipu_task_entry, node);
tmp->task_in_list = 1;
dev_dbg(tmp->dev,
"[0x%p] no-0x%x,id:%d sp_tsk "
"add_to_list.
", tmp,
tmp->task_no, tmp->task_id);
}
/* add to global list */
list_splice(&tsk->split_list, &ipu_task_list);
spin_unlock_irqrestore(&ipu_task_list_lock,
flags);
/* let the parent thread do the first sp_task */
/* FIXME: ensure the correct sequence for split
4size: 5/6->9/a*/
if (!sp_tsk0)
dev_err(tsk->dev,
"ERR: no-0x%x,can not get split_tsk0
",
tsk->task_no);
wake_up_interruptible(&thread_waitq);
get_res_do_task(sp_tsk0);
dev_dbg(sp_tsk0->dev,
"thread:%d complete tsk no:0x%x.
",
curr_thread_id, sp_tsk0->task_no);
ret = atomic_read(&req_cnt);
if (ret > 0) {
wake_up(&res_waitq);
dev_dbg(sp_tsk0->dev,
"sp_tsk0 sche thread:%d no:0x%x,"
"req_cnt:%d
", curr_thread_id,
sp_tsk0->task_no, ret);
/* For other threads to get_res */
schedule();
}
}
} else
get_res_do_task(tsk);
/* wait for all 4 sp_task finished here or timeout
and then release all resources */
if (split_parent && !split_fail)
wait_split_task_complete(tsk, sp_task, size);
if (!split_child) {
atomic_inc(&tsk->done);
wake_up(&tsk->task_waitq);
}
dev_dbg(tsk->dev, "thread:%d complete tsk no:0x%x-[0x%p].
",
curr_thread_id, tsk->task_no, tsk);
ret = atomic_read(&req_cnt);
if (ret > 0) {
wake_up(&res_waitq);
dev_dbg(tsk->dev, "sche thread:%d no:0x%x,req_cnt:%d
",
curr_thread_id, tsk->task_no, ret);
/* note: give cpu to other threads to get_res */
schedule();
}
kref_put(&tsk->refcount, task_mem_free);
}
pr_info("ERR %s exit.
", __func__);
return 0;
}
关于内核线程这一块的知识没有详细了解过,所以只能简单查看了一下几个函数的意思,这个函数最终调用的函数就是get_res_do_task函数,所以下面来分析这个函数。
(三)get_res_do_task函数
static void get_res_do_task(struct ipu_task_entry *t)
{
uint32_t found;
uint32_t split_child;
struct mutex *lock;
found = get_vdoa_ipu_res(t);
if (!found) {
dev_err(t->dev, "ERR:[0x%p] no-0x%x can not get res
",
t, t->task_no);
return;
} else {
if (t->set.task & VDOA_ONLY)
do_task_vdoa_only(t);
else if ((IPU_PIX_FMT_TILED_NV12F == t->input.format) &&
(t->set.mode & VDOA_BAND_MODE) &&
(t->input.crop.w >
soc_max_vdi_in_width(t->ipu)))
do_task_vdoa_vdi(t);
else
do_task(t);
put_vdoa_ipu_res(t, 0);
}
if (t->state != STATE_OK) {
dev_err(t->dev, "ERR:[0x%p] no-0x%x state: %s
",
t, t->task_no, state_msg[t->state].msg);
}
split_child = need_split(t) && t->parent;
if (split_child) {
lock = &t->parent->split_lock;
mutex_lock(lock);
t->split_done = 1;
mutex_unlock(lock);
wake_up(&t->parent->split_waitq);
}
return;
}
这个函数首先通过调用get_vdoa_ipu_res函数,这个函数看了半天都没理解想干嘛。。。然后根据t->set.task中的值来决定调用哪个执行函数。当为VDOA_ONLY时,就调用到do_task_vdoa_only函数,为VDOA_BAND_MODE时,就会调用do_task_vdoa_vdi函数,其他的情况下就直接调用do_task函数。
(四)do_task_vdoa_only函数
static void do_task_vdoa_only(struct ipu_task_entry *t)
{
int ret;
ret = init_tiled_ch_bufs(NULL, t);
CHECK_RETCODE(ret < 0, "do_vdoa_only", STATE_ERR, out, ret);
ret = vdoa_start(t->vdoa_handle, VDOA_DEF_TIMEOUT_MS);
vdoa_stop(t->vdoa_handle);
CHECK_RETCODE(ret < 0, "vdoa_wait4complete, do_vdoa_only",
STATE_VDOA_IRQ_TIMEOUT, out, ret);
t->state = STATE_OK;
out:
return;
}
4.1这个函数通过调用init_tiled_ch_bufs函数来初始化channel的tiledbuffers(平铺的缓冲区,不知道它与普通的缓冲区有什么不同)。在这个函数中会根据t->input.format里面保存的格式调用到init_tiled_buf函数,在这个init_tiled_buf函数里面会调用到vdoa_setup函数来完成vdoa里面寄存器的一些设置,通过vdoa_get_output_buf来读取vdoa中某些寄存器的值填充在buf中,然后再调用ipu_init_channel_buffer函数来初始化buffer。
4.2然后就会调用vdoa_start函数来启动vdoa,在这个函数中会初始化完成量(init_completion(&vdoa->comp);),启动irq,然后等待完成量(wait_for_completion_timeout(&vdoa->comp,msecs_to_jiffies(timeout_ms));
)。
4.3之后调用vdoa_stop函数来结束vdoa。
(五)do_task_vdoa_vdi函数
static void do_task_vdoa_vdi(struct ipu_task_entry *t)
{
int i;
int ret;
u32 stripe_width;
/* FIXME: crop mode not support now */
stripe_width = t->input.width >> 1;
t->input.crop.pos.x = 0;
t->input.crop.pos.y = 0;
t->input.crop.w = stripe_width;
t->input.crop.h = t->input.height;
t->output.crop.w = stripe_width;
t->output.crop.h = t->input.height;
for (i = 0; i < 2; i++) {
t->input.crop.pos.x = t->input.crop.pos.x + i * stripe_width;
t->output.crop.pos.x = t->output.crop.pos.x + i * stripe_width;
/* check input */
ret = set_crop(&t->input.crop, t->input.width, t->input.height,
t->input.format);
if (ret < 0) {
ret = STATE_ERR;
goto done;
} else
update_offset(t->input.format,
t->input.width, t->input.height,
t->input.crop.pos.x,
t->input.crop.pos.y,
&t->set.i_off, &t->set.i_uoff,
&t->set.i_voff, &t->set.istride);
dev_dbg(t->dev, "i_off:0x%x, i_uoff:0x%x, istride:%d.
",
t->set.i_off, t->set.i_uoff, t->set.istride);
/* check output */
ret = set_crop(&t->output.crop, t->input.width,
t->output.height, t->output.format);
if (ret < 0) {
ret = STATE_ERR;
goto done;
} else
update_offset(t->output.format,
t->output.width, t->output.height,
t->output.crop.pos.x,
t->output.crop.pos.y,
&t->set.o_off, &t->set.o_uoff,
&t->set.o_voff, &t->set.ostride);
dev_dbg(t->dev, "o_off:0x%x, o_uoff:0x%x, ostride:%d.
",
t->set.o_off, t->set.o_uoff, t->set.ostride);
do_task(t);
}
return;
done:
dev_err(t->dev, "ERR %s set_crop.
", __func__);
t->state = ret;
return;
}
在这个函数里面,有一条注释:cropmode not supportnow。所以这个函数就是设置了crop的一些相关信息,最后就调用到了do_task函数。所以下面重点来看看这个do_task函数。
(六)do_task函数
static void do_task(struct ipu_task_entry *t)
{
int r_size;
int irq;
int ret;
uint32_t busy;
struct ipu_soc *ipu = t->ipu;
CHECK_PERF(&t->ts_dotask);
if (!ipu) {
t->state = STATE_NO_IPU;
return;
}
init_completion(&t->irq_comp); //初始化完成量。
dev_dbg(ipu->dev, "[0x%p]Do task no:0x%x: id %d
", (void *)t,
t->task_no, t->task_id);
dump_task_info(t); //打印task信息。
if (t->set.task & IC_PP) {
t->set.ic_chan = MEM_PP_MEM;
dev_dbg(ipu->dev, "[0x%p]ic channel MEM_PP_MEM
", (void *)t);
} else if (t->set.task & IC_VF) {
t->set.ic_chan = MEM_PRP_VF_MEM;
dev_dbg(ipu->dev, "[0x%p]ic channel MEM_PRP_VF_MEM
", (void *)t);
} else if (t->set.task & VDI_VF) {
if (t->set.mode & VDOA_BAND_MODE) {
t->set.ic_chan = MEM_VDI_MEM;
if (deinterlace_3_field(t)) {
t->set.vdi_ic_p_chan = MEM_VDI_MEM_P;
t->set.vdi_ic_n_chan = MEM_VDI_MEM_N;
}
dev_dbg(ipu->dev, "[0x%p]ic ch MEM_VDI_MEM
",
(void *)t);
} else {
t->set.ic_chan = MEM_VDI_PRP_VF_MEM;
if (deinterlace_3_field(t)) {
t->set.vdi_ic_p_chan = MEM_VDI_PRP_VF_MEM_P;
t->set.vdi_ic_n_chan = MEM_VDI_PRP_VF_MEM_N;
}
dev_dbg(ipu->dev,
"[0x%p]ic ch MEM_VDI_PRP_VF_MEM
", t);
}
}
/*以上就是根据t->set.task中的值来决定t->set.ic_chan和t->set.vdi_ic_p_chan,t->set.vdi_ic_n_chan的取值。*/
if (t->set.task & ROT_PP) {
t->set.rot_chan = MEM_ROT_PP_MEM;
dev_dbg(ipu->dev, "[0x%p]rot channel MEM_ROT_PP_MEM
", (void *)t);
} else if (t->set.task & ROT_VF) {
t->set.rot_chan = MEM_ROT_VF_MEM;
dev_dbg(ipu->dev, "[0x%p]rot channel MEM_ROT_VF_MEM
", (void *)t);
}
/*设置t->set.rot_chan的值。*/
if (t->task_id == IPU_TASK_ID_VF)
busy = ic_vf_pp_is_busy(ipu, true);
else if (t->task_id == IPU_TASK_ID_PP)
busy = ic_vf_pp_is_busy(ipu, false);
else {
dev_err(ipu->dev, "ERR[no:0x%x]ipu task_id:%d invalid!
",
t->task_no, t->task_id);
return;
}
if (busy) {
dev_err(ipu->dev, "ERR[0x%p-no:0x%x]ipu task_id:%d busy!
",
(void *)t, t->task_no, t->task_id);
t->state = STATE_IPU_BUSY;
return;
}
irq = get_irq(t);
if (irq < 0) {
t->state = STATE_NO_IRQ;
return;
}
t->irq = irq;
/*获取中断。*/
/* channel setup */
if (only_ic(t->set.mode)) {
dev_dbg(t->dev, "[0x%p]only ic mode
", (void *)t);
ret = init_ic(ipu, t);
CHECK_RETCODE(ret < 0, "init_ic only_ic",
t->state, chan_setup, ret);
} else if (only_rot(t->set.mode)) {
dev_dbg(t->dev, "[0x%p]only rot mode
", (void *)t);
ret = init_rot(ipu, t);
CHECK_RETCODE(ret < 0, "init_rot only_rot",
t->state, chan_setup, ret);
} else if (ic_and_rot(t->set.mode)) {
int rot_idx = (t->task_id == IPU_TASK_ID_VF) ? 0 : 1;
dev_dbg(t->dev, "[0x%p]ic + rot mode
", (void *)t);
t->set.r_fmt = t->output.format;
if (t->output.rotate >= IPU_ROTATE_90_RIGHT) {
t->set.r_width = t->output.crop.h;
t->set.r_height = t->output.crop.w;
} else {
t->set.r_width = t->output.crop.w;
t->set.r_height = t->output.crop.h;
}
t->set.r_stride = t->set.r_width *
bytes_per_pixel(t->set.r_fmt);
r_size = PAGE_ALIGN(t->set.r_width * t->set.r_height
* fmt_to_bpp(t->set.r_fmt)/8);
if (r_size > ipu->rot_dma[rot_idx].size) {
dev_dbg(t->dev, "[0x%p]realloc rot buffer
", (void *)t);
if (ipu->rot_dma[rot_idx].vaddr)
dma_free_coherent(t->dev,
ipu->rot_dma[rot_idx].size,
ipu->rot_dma[rot_idx].vaddr,
ipu->rot_dma[rot_idx].paddr);
ipu->rot_dma[rot_idx].size = r_size;
ipu->rot_dma[rot_idx].vaddr = dma_alloc_coherent(t->dev,
r_size,
&ipu->rot_dma[rot_idx].paddr,
GFP_DMA | GFP_KERNEL);
CHECK_RETCODE(ipu->rot_dma[rot_idx].vaddr == NULL,
"ic_and_rot", STATE_SYS_NO_MEM,
chan_setup, -ENOMEM);
}
t->set.r_paddr = ipu->rot_dma[rot_idx].paddr;
dev_dbg(t->dev, "[0x%p]rotation:
", (void *)t);
dev_dbg(t->dev, "[0x%p] format = 0x%x
", (void *)t, t->set.r_fmt);
dev_dbg(t->dev, "[0x%p] width = %d
", (void *)t, t->set.r_width);
dev_dbg(t->dev, "[0x%p] height = %d
", (void *)t, t->set.r_height);
dev_dbg(t->dev, "[0x%p] paddr = 0x%x
", (void *)t, t->set.r_paddr);
dev_dbg(t->dev, "[0x%p] rstride = %d
", (void *)t, t->set.r_stride);
ret = init_ic(ipu, t);
CHECK_RETCODE(ret < 0, "init_ic ic_and_rot",
t->state, chan_setup, ret);
ret = init_rot(ipu, t);
CHECK_RETCODE(ret < 0, "init_rot ic_and_rot",
t->state, chan_setup, ret);
ret = ipu_link_channels(ipu, t->set.ic_chan,
t->set.rot_chan);
CHECK_RETCODE(ret < 0, "ipu_link_ch ic_and_rot",
STATE_LINK_CHAN_FAIL, chan_setup, ret);
} else {
dev_err(t->dev, "ERR [0x%p]do task: should not be here
", t);
t->state = STATE_ERR;
return;
}
/*以上的就是t->set.mode中的模式选择操作。这些MODE在structtask_set中声明的,有IC_MODE,ROT_MODE,VDI_MODE,IPU_PREPROCESS_MODE_MASK几种,如果只是IC_MODE的话(IC_MODE和VDI_MODE都算,具体可以查看only_ic函数的定义),就调用init_ic函数来初始化ic;如果只是ROT_MODE的话,就调用init_rot函数来初始化rot;如果是ic_and_rot
打开微信“扫一扫”,打开网页后点击屏幕右上角分享按钮