NXP

7.1 ipu_device.c分析(一)---流程分析

2019-07-12 13:41发布

ipu_common.c文件的ipu_probe函数中,最后调用到register_ipu_device函数,这个函数在ipu_device.c中,所以从这个文件开始分析。 这个文件中主要是两个内核线程的执行过程,根据FTF-CON-F0119.pdf中写的: 1)每个IPU有两个内核线程为了ICtaskPP&VF 2)每一个内核线程通过将task添加到它的taskqueue list中来执行这个task 3)每一个task执行流程是:ipu_init_channel--->ipu_init_channel_buffer--->request_ipu_irq--->ipu_enable_channel--->wait_irq(taskfinish)--->ipu_disable_channel--->ipu_uninit_channel 根据我的初步分析,这几个函数中有的函数名称已经发生变化,暂时先这样写,后面再具体分析。 4tasks基于单缓冲区模式 5)应用中只需要准备一个task,然后将这个task加入队列中即可。 6task操作包括: 设置taskinput/output/overlay/rotation/deinterlacing/buffer 首先调用ioctlIPU_CHECK_TASK,然后调用IPU_QUEUE_TASK这个ioctltask加入队列中。
ipu_common.c中的ipu_gen_init函数中,可以看到通过platform_driver_register(&mxcipu_driver);来将这个platform_driver类型的mxcipu_driver结构体注册到platform平台总线上面了。这个结构体作为驱动的部分注册到平台总线上面,这个驱动匹配的设备是:ipu*。而当设备也注册到这个总线上面的时候,平台就会执行对应的match函数,最终调用到mxcipu_driver结构体里面的probe函数。 同时,在mxc_v4l2_capture.c中,同样是通过platform_driver_register(&mxc_v4l2_driver);函数来将mxc_v4l2_driver结构体作为驱动部分注册到平台总线上面,这个驱动匹配的设备是:v4l2_cap_*
所以,暂时不理解这两者有什么关系。都是作为一个驱动来注册到platform平台上面的。而在一般的应用程序中,都是通过操作"/dev/video0"设备,而这个设备就是mxc_v4l2_capture.c中注册的v4l2_cap_*。这个设备包含v4l2里面很多的ioctl操作。 而想要操作ipu_common.c中注册的设备,是需要操作"/dev/mxc_ipu",这个设备只有简单的几个Ioctl操作,比如:IPU_CHECK_TASKIPU_QUEUE_TASK等。
(一)register_ipu_device函数 int register_ipu_device(struct ipu_soc *ipu, int id) { int ret = 0; static int idx; static struct ipu_thread_data thread_data[5]; if (!major) { major = register_chrdev(0, "mxc_ipu", &mxc_ipu_fops); if (major < 0) { printk(KERN_ERR "Unable to register mxc_ipu as a char device "); ret = major; goto register_cdev_fail; } /* 注册字符设备,主要是把这个mxc_ipu_fops结构体注册到内核中了。 */ ipu_class = class_create(THIS_MODULE, "mxc_ipu"); if (IS_ERR(ipu_class)) { ret = PTR_ERR(ipu_class); goto ipu_class_fail; } /* 创建类 */ ipu_dev = device_create(ipu_class, NULL, MKDEV(major, 0), NULL, "mxc_ipu"); if (IS_ERR(ipu_dev)) { ret = PTR_ERR(ipu_dev); goto dev_create_fail; } /* 创建设备节点 */ ipu_dev->dma_mask = kmalloc(sizeof(*ipu_dev->dma_mask), GFP_KERNEL); *ipu_dev->dma_mask = DMA_BIT_MASK(32); ipu_dev->coherent_dma_mask = DMA_BIT_MASK(32); /* 为struct device *ipu_dev中的 dma_mask项分配内存空间,然后设置它。 */ mutex_init(&ipu_ch_tbl.lock); } max_ipu_no = ++id; ipu->rot_dma[0].size = 0; ipu->rot_dma[1].size = 0; /* 设置 max_ipu_no的值,它是根据ipu_common.c中的ipu_probe函数传过来的id参数设置的,暂时没分析。大致意思是获取的ipu数目。 */ thread_data[idx].ipu = ipu; thread_data[idx].id = 0; thread_data[idx].is_vdoa = 0; ipu->thread[0] = kthread_run(ipu_task_thread, &thread_data[idx++], "ipu%d_task", id); if (IS_ERR(ipu->thread[0])) { ret = PTR_ERR(ipu->thread[0]); goto kthread0_fail; }
/*这个idx是个函数局部变量,在这用的时候,上面只有一句声明:staticint idx;并没有对它进行初始化操作,这是一个bug么???暂且认为它是0,然后设置thread_data[0]ipuidis_vdoa参数。*/ /*查看ipu_soc结构体可以看到里面有这一项:structtask_struct *thread[2];这个结构体准备了两个内核线程供我们使用。现在先调用kthread_run来创建并启动第一个内核线程。关于内核线程的知识,参看《Linux内核kthread_run函数理解学习》和《运行不息的内核线程kthread》这两个文件。*/ thread_data[idx].ipu = ipu; thread_data[idx].id = 1; thread_data[idx].is_vdoa = 0; ipu->thread[1] = kthread_run(ipu_task_thread, &thread_data[idx++], "ipu%d_task", id); if (IS_ERR(ipu->thread[1])) { ret = PTR_ERR(ipu->thread[1]); goto kthread1_fail; }
/*在这创建并启动第二个内核线程。这两个线程的运行函数都是ipu_task_thread*/ return ret; kthread1_fail: kthread_stop(ipu->thread[0]); kthread0_fail: if (id == 0) device_destroy(ipu_class, MKDEV(major, 0)); dev_create_fail: if (id == 0) { class_destroy(ipu_class); } ipu_class_fail: if (id == 0) unregister_chrdev(major, "mxc_ipu"); register_cdev_fail: return ret; }
(二)下面就到ipu_task_thread函数中去: static int ipu_task_thread(void *argv) { struct ipu_task_entry *tsk; struct ipu_task_entry *sp_tsk0; struct ipu_split_task sp_task[4]; /* priority lower than irq_thread */ const struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO/2 - 1, }; int ret; int curr_thread_id; uint32_t size; unsigned long flags; unsigned int cpu; struct cpumask cpu_mask; struct ipu_thread_data *data = (struct ipu_thread_data *)argv; thread_id++; curr_thread_id = thread_id; sched_setscheduler(current, SCHED_FIFO, ¶m);
/*这个函数参考http://blog.csdn.net/allwtg/article/details/5254306sched_setscheduler用法*/ if (!data->is_vdoa) { cpu = cpumask_first(cpu_online_mask); cpumask_set_cpu(cpu, &cpu_mask);
/*这个函数参考http://blog.csdn.net/nirenxiaoxiao/article/details/21462053Linuxcpumask分析*/ ret = sched_setaffinity(data->ipu->thread[data->id]->pid, &cpu_mask); /*这个函数参考http://www.cnblogs.com/visayafan/archive/2011/12/10/2283375.htmlLinuxCPU亲和性(affinity)及与亲和性有关的两个函数sched_setaffinity()sched_getaffinity()*/ if (ret < 0) { pr_err("%s: sched_setaffinity fail:%d. ", __func__, ret); } pr_debug("%s: sched_setaffinity cpu:%d. ", __func__, cpu); } while (!kthread_should_stop()) { int split_fail = 0; int split_parent; int split_child;
/*这个函数参考http://blog.csdn.net/angle_birds/article/details/8206091运行不息的内核线程kthread*/ wait_event_interruptible(thread_waitq, find_task(&tsk, curr_thread_id)); if (!tsk) { pr_err("thread:%d can not find task. ", curr_thread_id); continue; } /* note: other threads run split child task */ split_parent = need_split(tsk) && !tsk->parent; split_child = need_split(tsk) && tsk->parent; if (split_parent) { if ((tsk->set.split_mode == RL_SPLIT) || (tsk->set.split_mode == UD_SPLIT)) size = 2; else size = 4; ret = queue_split_task(tsk, sp_task, size); if (ret < 0) { split_fail = 1; } else { struct list_head *pos; spin_lock_irqsave(&ipu_task_list_lock, flags); sp_tsk0 = list_first_entry(&tsk->split_list, struct ipu_task_entry, node); list_del(&sp_tsk0->node); list_for_each(pos, &tsk->split_list) { struct ipu_task_entry *tmp; tmp = list_entry(pos, struct ipu_task_entry, node); tmp->task_in_list = 1; dev_dbg(tmp->dev, "[0x%p] no-0x%x,id:%d sp_tsk " "add_to_list. ", tmp, tmp->task_no, tmp->task_id); } /* add to global list */ list_splice(&tsk->split_list, &ipu_task_list); spin_unlock_irqrestore(&ipu_task_list_lock, flags); /* let the parent thread do the first sp_task */ /* FIXME: ensure the correct sequence for split 4size: 5/6->9/a*/ if (!sp_tsk0) dev_err(tsk->dev, "ERR: no-0x%x,can not get split_tsk0 ", tsk->task_no); wake_up_interruptible(&thread_waitq); get_res_do_task(sp_tsk0); dev_dbg(sp_tsk0->dev, "thread:%d complete tsk no:0x%x. ", curr_thread_id, sp_tsk0->task_no); ret = atomic_read(&req_cnt); if (ret > 0) { wake_up(&res_waitq); dev_dbg(sp_tsk0->dev, "sp_tsk0 sche thread:%d no:0x%x," "req_cnt:%d ", curr_thread_id, sp_tsk0->task_no, ret); /* For other threads to get_res */ schedule(); } } } else get_res_do_task(tsk); /* wait for all 4 sp_task finished here or timeout and then release all resources */ if (split_parent && !split_fail) wait_split_task_complete(tsk, sp_task, size); if (!split_child) { atomic_inc(&tsk->done); wake_up(&tsk->task_waitq); } dev_dbg(tsk->dev, "thread:%d complete tsk no:0x%x-[0x%p]. ", curr_thread_id, tsk->task_no, tsk); ret = atomic_read(&req_cnt); if (ret > 0) { wake_up(&res_waitq); dev_dbg(tsk->dev, "sche thread:%d no:0x%x,req_cnt:%d ", curr_thread_id, tsk->task_no, ret); /* note: give cpu to other threads to get_res */ schedule(); } kref_put(&tsk->refcount, task_mem_free); } pr_info("ERR %s exit. ", __func__); return 0; }
关于内核线程这一块的知识没有详细了解过,所以只能简单查看了一下几个函数的意思,这个函数最终调用的函数就是get_res_do_task函数,所以下面来分析这个函数。
(三)get_res_do_task函数 static void get_res_do_task(struct ipu_task_entry *t) { uint32_t found; uint32_t split_child; struct mutex *lock; found = get_vdoa_ipu_res(t); if (!found) { dev_err(t->dev, "ERR:[0x%p] no-0x%x can not get res ", t, t->task_no); return; } else { if (t->set.task & VDOA_ONLY) do_task_vdoa_only(t); else if ((IPU_PIX_FMT_TILED_NV12F == t->input.format) && (t->set.mode & VDOA_BAND_MODE) && (t->input.crop.w > soc_max_vdi_in_width(t->ipu))) do_task_vdoa_vdi(t); else do_task(t); put_vdoa_ipu_res(t, 0); } if (t->state != STATE_OK) { dev_err(t->dev, "ERR:[0x%p] no-0x%x state: %s ", t, t->task_no, state_msg[t->state].msg); } split_child = need_split(t) && t->parent; if (split_child) { lock = &t->parent->split_lock; mutex_lock(lock); t->split_done = 1; mutex_unlock(lock); wake_up(&t->parent->split_waitq); } return; }
这个函数首先通过调用get_vdoa_ipu_res函数,这个函数看了半天都没理解想干嘛。。。然后根据t->set.task中的值来决定调用哪个执行函数。当为VDOA_ONLY时,就调用到do_task_vdoa_only函数,为VDOA_BAND_MODE时,就会调用do_task_vdoa_vdi函数,其他的情况下就直接调用do_task函数。
(四)do_task_vdoa_only函数 static void do_task_vdoa_only(struct ipu_task_entry *t) { int ret; ret = init_tiled_ch_bufs(NULL, t); CHECK_RETCODE(ret < 0, "do_vdoa_only", STATE_ERR, out, ret); ret = vdoa_start(t->vdoa_handle, VDOA_DEF_TIMEOUT_MS); vdoa_stop(t->vdoa_handle); CHECK_RETCODE(ret < 0, "vdoa_wait4complete, do_vdoa_only", STATE_VDOA_IRQ_TIMEOUT, out, ret); t->state = STATE_OK; out: return; }
4.1这个函数通过调用init_tiled_ch_bufs函数来初始化channeltiledbuffers(平铺的缓冲区,不知道它与普通的缓冲区有什么不同)。在这个函数中会根据t->input.format里面保存的格式调用到init_tiled_buf函数,在这个init_tiled_buf函数里面会调用到vdoa_setup函数来完成vdoa里面寄存器的一些设置,通过vdoa_get_output_buf来读取vdoa中某些寄存器的值填充在buf中,然后再调用ipu_init_channel_buffer函数来初始化buffer 4.2然后就会调用vdoa_start函数来启动vdoa,在这个函数中会初始化完成量(init_completion(&vdoa->comp);),启动irq,然后等待完成量(wait_for_completion_timeout(&vdoa->comp,msecs_to_jiffies(timeout_ms)); )。 4.3之后调用vdoa_stop函数来结束vdoa
(五)do_task_vdoa_vdi函数 static void do_task_vdoa_vdi(struct ipu_task_entry *t) { int i; int ret; u32 stripe_width; /* FIXME: crop mode not support now */ stripe_width = t->input.width >> 1; t->input.crop.pos.x = 0; t->input.crop.pos.y = 0; t->input.crop.w = stripe_width; t->input.crop.h = t->input.height; t->output.crop.w = stripe_width; t->output.crop.h = t->input.height; for (i = 0; i < 2; i++) { t->input.crop.pos.x = t->input.crop.pos.x + i * stripe_width; t->output.crop.pos.x = t->output.crop.pos.x + i * stripe_width; /* check input */ ret = set_crop(&t->input.crop, t->input.width, t->input.height, t->input.format); if (ret < 0) { ret = STATE_ERR; goto done; } else update_offset(t->input.format, t->input.width, t->input.height, t->input.crop.pos.x, t->input.crop.pos.y, &t->set.i_off, &t->set.i_uoff, &t->set.i_voff, &t->set.istride); dev_dbg(t->dev, "i_off:0x%x, i_uoff:0x%x, istride:%d. ", t->set.i_off, t->set.i_uoff, t->set.istride); /* check output */ ret = set_crop(&t->output.crop, t->input.width, t->output.height, t->output.format); if (ret < 0) { ret = STATE_ERR; goto done; } else update_offset(t->output.format, t->output.width, t->output.height, t->output.crop.pos.x, t->output.crop.pos.y, &t->set.o_off, &t->set.o_uoff, &t->set.o_voff, &t->set.ostride); dev_dbg(t->dev, "o_off:0x%x, o_uoff:0x%x, ostride:%d. ", t->set.o_off, t->set.o_uoff, t->set.ostride); do_task(t); } return; done: dev_err(t->dev, "ERR %s set_crop. ", __func__); t->state = ret; return; }
在这个函数里面,有一条注释:cropmode not supportnow。所以这个函数就是设置了crop的一些相关信息,最后就调用到了do_task函数。所以下面重点来看看这个do_task函数。
(六)do_task函数 static void do_task(struct ipu_task_entry *t) { int r_size; int irq; int ret; uint32_t busy; struct ipu_soc *ipu = t->ipu; CHECK_PERF(&t->ts_dotask); if (!ipu) { t->state = STATE_NO_IPU; return; } init_completion(&t->irq_comp); //初始化完成量。 dev_dbg(ipu->dev, "[0x%p]Do task no:0x%x: id %d ", (void *)t, t->task_no, t->task_id); dump_task_info(t); //打印task信息。 if (t->set.task & IC_PP) { t->set.ic_chan = MEM_PP_MEM; dev_dbg(ipu->dev, "[0x%p]ic channel MEM_PP_MEM ", (void *)t); } else if (t->set.task & IC_VF) { t->set.ic_chan = MEM_PRP_VF_MEM; dev_dbg(ipu->dev, "[0x%p]ic channel MEM_PRP_VF_MEM ", (void *)t); } else if (t->set.task & VDI_VF) { if (t->set.mode & VDOA_BAND_MODE) { t->set.ic_chan = MEM_VDI_MEM; if (deinterlace_3_field(t)) { t->set.vdi_ic_p_chan = MEM_VDI_MEM_P; t->set.vdi_ic_n_chan = MEM_VDI_MEM_N; } dev_dbg(ipu->dev, "[0x%p]ic ch MEM_VDI_MEM ", (void *)t); } else { t->set.ic_chan = MEM_VDI_PRP_VF_MEM; if (deinterlace_3_field(t)) { t->set.vdi_ic_p_chan = MEM_VDI_PRP_VF_MEM_P; t->set.vdi_ic_n_chan = MEM_VDI_PRP_VF_MEM_N; } dev_dbg(ipu->dev, "[0x%p]ic ch MEM_VDI_PRP_VF_MEM ", t); } }
/*以上就是根据t->set.task中的值来决定t->set.ic_chant->set.vdi_ic_p_chant->set.vdi_ic_n_chan的取值。*/ if (t->set.task & ROT_PP) { t->set.rot_chan = MEM_ROT_PP_MEM; dev_dbg(ipu->dev, "[0x%p]rot channel MEM_ROT_PP_MEM ", (void *)t); } else if (t->set.task & ROT_VF) { t->set.rot_chan = MEM_ROT_VF_MEM; dev_dbg(ipu->dev, "[0x%p]rot channel MEM_ROT_VF_MEM ", (void *)t); }
/*设置t->set.rot_chan的值。*/ if (t->task_id == IPU_TASK_ID_VF) busy = ic_vf_pp_is_busy(ipu, true); else if (t->task_id == IPU_TASK_ID_PP) busy = ic_vf_pp_is_busy(ipu, false); else { dev_err(ipu->dev, "ERR[no:0x%x]ipu task_id:%d invalid! ", t->task_no, t->task_id); return; } if (busy) { dev_err(ipu->dev, "ERR[0x%p-no:0x%x]ipu task_id:%d busy! ", (void *)t, t->task_no, t->task_id); t->state = STATE_IPU_BUSY; return; } irq = get_irq(t); if (irq < 0) { t->state = STATE_NO_IRQ; return; } t->irq = irq;
/*获取中断。*/ /* channel setup */ if (only_ic(t->set.mode)) { dev_dbg(t->dev, "[0x%p]only ic mode ", (void *)t); ret = init_ic(ipu, t); CHECK_RETCODE(ret < 0, "init_ic only_ic", t->state, chan_setup, ret); } else if (only_rot(t->set.mode)) { dev_dbg(t->dev, "[0x%p]only rot mode ", (void *)t); ret = init_rot(ipu, t); CHECK_RETCODE(ret < 0, "init_rot only_rot", t->state, chan_setup, ret); } else if (ic_and_rot(t->set.mode)) { int rot_idx = (t->task_id == IPU_TASK_ID_VF) ? 0 : 1; dev_dbg(t->dev, "[0x%p]ic + rot mode ", (void *)t); t->set.r_fmt = t->output.format; if (t->output.rotate >= IPU_ROTATE_90_RIGHT) { t->set.r_width = t->output.crop.h; t->set.r_height = t->output.crop.w; } else { t->set.r_width = t->output.crop.w; t->set.r_height = t->output.crop.h; } t->set.r_stride = t->set.r_width * bytes_per_pixel(t->set.r_fmt); r_size = PAGE_ALIGN(t->set.r_width * t->set.r_height * fmt_to_bpp(t->set.r_fmt)/8); if (r_size > ipu->rot_dma[rot_idx].size) { dev_dbg(t->dev, "[0x%p]realloc rot buffer ", (void *)t); if (ipu->rot_dma[rot_idx].vaddr) dma_free_coherent(t->dev, ipu->rot_dma[rot_idx].size, ipu->rot_dma[rot_idx].vaddr, ipu->rot_dma[rot_idx].paddr); ipu->rot_dma[rot_idx].size = r_size; ipu->rot_dma[rot_idx].vaddr = dma_alloc_coherent(t->dev, r_size, &ipu->rot_dma[rot_idx].paddr, GFP_DMA | GFP_KERNEL); CHECK_RETCODE(ipu->rot_dma[rot_idx].vaddr == NULL, "ic_and_rot", STATE_SYS_NO_MEM, chan_setup, -ENOMEM); } t->set.r_paddr = ipu->rot_dma[rot_idx].paddr; dev_dbg(t->dev, "[0x%p]rotation: ", (void *)t); dev_dbg(t->dev, "[0x%p] format = 0x%x ", (void *)t, t->set.r_fmt); dev_dbg(t->dev, "[0x%p] width = %d ", (void *)t, t->set.r_width); dev_dbg(t->dev, "[0x%p] height = %d ", (void *)t, t->set.r_height); dev_dbg(t->dev, "[0x%p] paddr = 0x%x ", (void *)t, t->set.r_paddr); dev_dbg(t->dev, "[0x%p] rstride = %d ", (void *)t, t->set.r_stride); ret = init_ic(ipu, t); CHECK_RETCODE(ret < 0, "init_ic ic_and_rot", t->state, chan_setup, ret); ret = init_rot(ipu, t); CHECK_RETCODE(ret < 0, "init_rot ic_and_rot", t->state, chan_setup, ret); ret = ipu_link_channels(ipu, t->set.ic_chan, t->set.rot_chan); CHECK_RETCODE(ret < 0, "ipu_link_ch ic_and_rot", STATE_LINK_CHAN_FAIL, chan_setup, ret); } else { dev_err(t->dev, "ERR [0x%p]do task: should not be here ", t); t->state = STATE_ERR; return; } /*以上的就是t->set.mode中的模式选择操作。这些MODEstructtask_set中声明的,有IC_MODEROT_MODEVDI_MODEIPU_PREPROCESS_MODE_MASK几种,如果只是IC_MODE的话(IC_MODEVDI_MODE都算,具体可以查看only_ic函数的定义),就调用init_ic函数来初始化ic;如果只是ROT_MODE的话,就调用init_rot函数来初始化rot;如果是ic_and_rot