CPUFreq子系统位于drivers/cpufreq目录下,复杂进行运行过程中CPU频率和电压的动态调整,即DVFS(Dynamic Voltage Frequency Scaling,动态电压频率调整)。运行时进行CPU电压和频率调整的原因是:CMOS电路的功耗与电压的平方成正比、与频率成正比,因此降低电压和频率可降低功耗。
CPUFreq的核心层位于drivers/cpufreq/cpufreq.c下,它为各个SoC的CPUFreq驱动的实现提供了一套统一的接口,并实现了一套notifier机制,可以在CPUFrq的策略和频率改变的时候向其他模块发出通知。另外,在CPU运行频率发送变化的时候,内核的loops_per_jiffy常数也会发生相应的变化。
SoC的CPUFreq驱动实现
每个SoC的具体CPUFreq驱动实例只需要实现电压、频率表,以及从硬件层面完整这些变化。
CPUFreq核心层提供了如下API以供SoC注册自身的CPUFreq驱动:
int cpufreq_register_driver(struct cpufreq_driver *driver_data)
其参数为一个cpufreq_driver结构体指针,实际上,cpufreq_driver封装了一个具体的SoC的CPUFreq驱动的主体,该结构体代码如下:
struct cpufreq_driver {
char name[CPUFREQ_NAME_LEN];
u8 flags;
void *driver_data;
/* needed by all drivers */int (*init)(struct cpufreq_policy *policy);
int (*verify)(struct cpufreq_policy *policy);
/* define one out of two */int (*setpolicy)(struct cpufreq_policy *policy);
/*
* On failure, should always restore frequency to policy->restore_freq
* (i.e. old freq).
*/int (*target)(struct cpufreq_policy *policy,
unsignedint target_freq,
unsignedint relation); /* Deprecated */int (*target_index)(struct cpufreq_policy *policy,
unsignedint index);
unsignedint (*fast_switch)(struct cpufreq_policy *policy,
unsignedint target_freq);
/*
* Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION
* unset.
*
* get_intermediate should return a stable intermediate frequency
* platform wants to switch to and target_intermediate() should set CPU
* to to that frequency, before jumping to the frequency corresponding
* to 'index'. Core will take care of sending notifications and driver
* doesn't have to handle them in target_intermediate() or
* target_index().
*
* Drivers can return '0' from get_intermediate() in case they don't
* wish to switch to intermediate frequency for some target frequency.
* In that case core will directly call ->target_index().
*/unsignedint (*get_intermediate)(struct cpufreq_policy *policy,
unsignedint index);
int (*target_intermediate)(struct cpufreq_policy *policy,
unsignedint index);
/* should be defined, if possible */unsignedint (*get)(unsignedint cpu);
/* optional */int (*bios_limit)(int cpu, unsignedint *limit);
int (*exit)(struct cpufreq_policy *policy);
void (*stop_cpu)(struct cpufreq_policy *policy);
int (*suspend)(struct cpufreq_policy *policy);
int (*resume)(struct cpufreq_policy *policy);
/* Will be called after the driver is fully initialized */void (*ready)(struct cpufreq_policy *policy);
struct freq_attr **attr;
/* platform specific boost support code */bool boost_enabled;
int (*set_boost)(int state);
};
其中name成员是CPUFreq驱动的名字,如drivers/cpufreq/s5pv210-cpufreq.c设置那么为s5pv210,drivers/cpufreq/omap-cpufreq.c设置name为omap; init()成员是一个per-CPU初始化函数指针,每当一个新的CPU被注册进系统的时候,该函数就被调用,该函数接受一个cpufreq_policy的指针参数。 verify()成员函数用于对用户的CPUFreq策略设置进行有效性验证和数据修正,每当用户设定一个新策略时,该函数根据老的策略和新的策略,检验新策略设置的有效性并对无效设置进行表要的修正,该成员函数的具体实现,常用到如下辅助函数
static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy,
unsigned int min, unsigned int max)
setpolicy()成员函数接受一个policy参数,实现了这个成员函数的CPU一遍具备在一个范围里的自耦东调整频率的能力。 target()成员函数用于将频率调整到一个指定的值。
cpufreq_register_driver 函数,具体操作内容如下:
/*********************************************************************
* REGISTER / UNREGISTER CPUFREQ DRIVER *
*********************************************************************//**
* cpufreq_register_driver - register a CPU Frequency driver
* @driver_data: A struct cpufreq_driver containing the values#
* submitted by the CPU Frequency driver.
*
* Registers a CPU Frequency driver to this core code. This code
* returns zero on success, -EEXIST when another driver got here first
* (and isn't unregistered in the meantime).
*
*/int cpufreq_register_driver(struct cpufreq_driver *driver_data)
{
unsigned long flags;
int ret;
if (cpufreq_disabled())
return -ENODEV;
if (!driver_data || !driver_data->verify || !driver_data->init ||
!(driver_data->setpolicy || driver_data->target_index ||
driver_data->target) ||
(driver_data->setpolicy && (driver_data->target_index ||
driver_data->target)) ||
(!!driver_data->get_intermediate != !!driver_data->target_intermediate))
return -EINVAL;
pr_debug("trying to register driver %s
", driver_data->name);
/* Protect against concurrent CPU online/offline. */
get_online_cpus();
write_lock_irqsave(&cpufreq_driver_lock, flags);
if (cpufreq_driver) {
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
ret = -EEXIST;
goto out;
}
cpufreq_driver = driver_data;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
if (driver_data->setpolicy)
driver_data->flags |= CPUFREQ_CONST_LOOPS;
if (cpufreq_boost_supported()) {
ret = create_boost_sysfs_file();
if (ret)
goto err_null_driver;
}
ret = subsys_interface_register(&cpufreq_interface);
if (ret)
goto err_boost_unreg;
if (!(cpufreq_driver->flags & CPUFREQ_STICKY) &&
list_empty(&cpufreq_policy_list)) {
/* if all ->init() calls failed, unregister */
pr_debug("%s: No CPU initialized for driver %s
", __func__,
driver_data->name);
goto err_if_unreg;
}
register_hotcpu_notifier(&cpufreq_cpu_notifier);
pr_debug("driver %s up and running
", driver_data->name);
goto out;
err_if_unreg:
subsys_interface_unregister(&cpufreq_interface);
err_boost_unreg:
remove_boost_sysfs_file();
err_null_driver:
write_lock_irqsave(&cpufreq_driver_lock, flags);
cpufreq_driver = NULL;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
out:
put_online_cpus();
return ret;
}
一个SoC的CPUFreq驱动实例drivers/cpufreq/s3c64xx-cpufreq.c的核心结构代码如下:
/*
* Copyright 2009 Wolfson Microelectronics plc
*
* S3C64xx CPUfreq Support
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/#define pr_fmt(fmt) "cpufreq: " fmt#include #include #include #include #include #include #include #include staticstruct regulator *vddarm;
staticunsignedlong regulator_latency;
#ifdef CONFIG_CPU_S3C6410struct s3c64xx_dvfs {
unsignedint vddarm_min;
unsignedint vddarm_max;
};
staticstruct s3c64xx_dvfs s3c64xx_dvfs_table[] = {
[0] = { 1000000, 1150000 },
[1] = { 1050000, 1150000 },
[2] = { 1100000, 1150000 },
[3] = { 1200000, 1350000 },
[4] = { 1300000, 1350000 },
};
staticstruct cpufreq_frequency_table s3c64xx_freq_table[] = {
{ 0, 0, 66000 },
{ 0, 0, 100000 },
{ 0, 0, 133000 },
{ 0, 1, 200000 },
{ 0, 1, 222000 },
{ 0, 1, 266000 },
{ 0, 2, 333000 },
{ 0, 2, 400000 },
{ 0, 2, 532000 },
{ 0, 2, 533000 },
{ 0, 3, 667000 },
{ 0, 4, 800000 },
{ 0, 0, CPUFREQ_TABLE_END },
};
#endifstaticint s3c64xx_cpufreq_set_target(struct cpufreq_policy *policy,
unsignedint index)
{
struct s3c64xx_dvfs *dvfs;
unsignedint old_freq, new_freq;
int ret;
old_freq = clk_get_rate(policy->clk) / 1000;
new_freq = s3c64xx_freq_table[index].frequency;
dvfs = &s3c64xx_dvfs_table[s3c64xx_freq_table[index].driver_data];
#ifdef CONFIG_REGULATORif (vddarm && new_freq > old_freq) {
ret = regulator_set_voltage(vddarm,
dvfs->vddarm_min,
dvfs->vddarm_max);
if (ret != 0) {
pr_err("Failed to set VDDARM for %dkHz: %d
",
new_freq, ret);
return ret;
}
}
#endif
ret = clk_set_rate(policy->clk, new_freq * 1000);
if (ret < 0) {
pr_err("Failed to set rate %dkHz: %d
",
new_freq, ret);
return ret;
}
#ifdef CONFIG_REGULATORif (vddarm && new_freq < old_freq) {
ret = regulator_set_voltage(vddarm,
dvfs->vddarm_min,
dvfs->vddarm_max);
if (ret != 0) {
pr_err("Failed to set VDDARM for %dkHz: %d
",
new_freq, ret);
if (clk_set_rate(policy->clk, old_freq * 1000) < 0)
pr_err("Failed to restore original clock rate
");
return ret;
}
}
#endif
pr_debug("Set actual frequency %lukHz
",
clk_get_rate(policy->clk) / 1000);
return0;
}
#ifdef CONFIG_REGULATORstaticvoid __init s3c64xx_cpufreq_config_regulator(void)
{
int count, v, i, found;
struct cpufreq_frequency_table *freq;
struct s3c64xx_dvfs *dvfs;
count = regulator_count_voltages(vddarm);
if (count < 0) {
pr_err("Unable to check supported voltages
");
}
if (!count)
goto out;
cpufreq_for_each_valid_entry(freq, s3c64xx_freq_table) {
dvfs = &s3c64xx_dvfs_table[freq->driver_data];
found = 0;
for (i = 0; i < count; i++) {
v = regulator_list_voltage(vddarm, i);
if (v >= dvfs->vddarm_min && v <= dvfs->vddarm_max)
found = 1;
}
if (!found) {
pr_debug("%dkHz unsupported by regulator
",
freq->frequency);
freq->frequency = CPUFREQ_ENTRY_INVALID;
}
}
out:
/* Guess based on having to do an I2C/SPI write; in future we
* will be able to query the regulator performance here. */
regulator_latency = 1 * 1000 * 1000;
}
#endifstaticint s3c64xx_cpufreq_driver_init(struct cpufreq_policy *policy)
{
int ret;
struct cpufreq_frequency_table *freq;
if (policy->cpu != 0)
return -EINVAL;
if (s3c64xx_freq_table == NULL) {
pr_err("No frequency information for this CPU
");
return -ENODEV;
}
policy->clk = clk_get(NULL, "armclk");
if (IS_ERR(policy->clk)) {
pr_err("Unable to obtain ARMCLK: %ld
",
PTR_ERR(policy->clk));
return PTR_ERR(policy->clk);
}
#ifdef CONFIG_REGULATOR
vddarm = regulator_get(NULL, "vddarm");
if (IS_ERR(vddarm)) {
ret = PTR_ERR(vddarm);
pr_err("Failed to obtain VDDARM: %d
", ret);
pr_err("Only frequency scaling available
");
vddarm = NULL;
} else {
s3c64xx_cpufreq_config_regulator();
}
#endif
cpufreq_for_each_entry(freq, s3c64xx_freq_table) {
unsignedlong r;
/* Check for frequencies we can generate */
r = clk_round_rate(policy->clk, freq->frequency * 1000);
r /= 1000;
if (r != freq->frequency) {
pr_debug("%dkHz unsupported by clock
",
freq->frequency);
freq->frequency = CPUFREQ_ENTRY_INVALID;
}
/* If we have no regulator then assume startup
* frequency is the maximum we can support. */if (!vddarm && freq->frequency > clk_get_rate(policy->clk) / 1000)
freq->frequency = CPUFREQ_ENTRY_INVALID;
}
/* Datasheet says PLL stabalisation time (if we were to use
* the PLLs, which we don't currently) is ~300us worst case,
* but add some fudge.
*/
ret = cpufreq_generic_init(policy, s3c64xx_freq_table,
(500 * 1000) + regulator_latency);
if (ret != 0) {
pr_err("Failed to configure frequency table: %d
",
ret);
regulator_put(vddarm);
clk_put(policy->clk);
}
return ret;
}
staticstruct cpufreq_driver s3c64xx_cpufreq_driver = {
.flags = CPUFREQ_NEED_INITIAL_FREQ_CHECK,
.verify = cpufreq_generic_frequency_table_verify,
.target_index = s3c64xx_cpufreq_set_target,
.get = cpufreq_generic_get,
.init = s3c64xx_cpufreq_driver_init,
.name = "s3c",
};
staticint __init s3c64xx_cpufreq_init(void)
{
return cpufreq_register_driver(&s3c64xx_cpufreq_driver);
}
module_init(s3c64xx_cpufreq_init);
CPUFreq的策略
系统的状态以及CPUFreq的策略共同决定了CPU频率跳变的目标,CPUFreq核心层将目标频率传递给底层具体SoC的CPUFreq驱动,该驱动修改硬件,完成频率的变化。如下图所示:
用户空间一般可通过/sys/devices/system/cpu/cpux/cpufreq节点来设置CPUFreq,例如,设置CPUFreq到700MHz,采用userspace策略,则运行如下命令:
# echo userspace > /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor
# echo 700000 > /sys/devices/system/cpu/cpu0/cpufreq/scaling_setspeed
CPUIdele驱动
目前的ARM SoC大多支持几个不同的Idle级别,CPUIdle驱动子系统存在的目的就是对这些Idle状态进行管理,并根据系统运行的情况进入不同的Idle级别。
与CPUFreq类似,CPUIdle的核心层提供了用户注册cpuidle_driver的注册函数:
int cpuidle_register_driver(struct cpuidle_driver *drv)
cpuidle_device注册函数:
int cpuidle_register_device(struct cpuidle_device *dev)
cpuidle_driver结构体是CPUIdle驱动的主体
/****************************
* CPUIDLE DRIVER INTERFACE *
****************************/
struct cpuidle_driver {
constchar *name;
struct module *owner;
int refcnt;
/* used by the cpuidle framework to setup the broadcast timer */
unsigned int bctimer:1;
/* states array must be ordered in decreasing power consumption */
struct cpuidle_state states[CPUIDLE_STATE_MAX];
int state_count;
int safe_state_index;
/* the driver handles the cpus in cpumask */
struct cpumask *cpumask;
};
其中关键结构体是成员cpuidle_state,states[CPUIDLE_STATE_MAX] 该结构体数组就用于存储各种不同的Idle级别的信息,定义如下:
struct cpuidle_state {
char name[CPUIDLE_NAME_LEN];
char desc[CPUIDLE_DESC_LEN];
unsignedint flags;
unsignedint exit_latency; /* in US */int power_usage; /* in mW */unsignedint target_residency; /* in US */bool disabled; /* disabled on all CPUs */int (*enter) (struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index);
int (*enter_dead) (struct cpuidle_device *dev, int index);
/*
* CPUs execute ->enter_freeze with the local tick or entire timekeeping
* suspended, so it must not re-enable interrupts at any point (even
* temporarily) or attempt to change states of clock event devices.
*/void (*enter_freeze) (struct cpuidle_device *dev,
struct cpuidle_driver *drv,
int index);
};
一个具体的SoC驱动实例,drivers/cpuidle/cpuidle-ux500.c,它有两个Idle级别,即WFI和ApIdle,具体代码如下:
/*
* Copyright (c) 2012 Linaro : Daniel Lezcano (IBM)
*
* Based onthe work of Rickard Andersson
* and Jonas Aaberg .
*
* This program is free software; you can redistribute itand/or modify
* it under the terms ofthe GNU General Public License version2as
* published bythe Free Software Foundation.
*/
#include #include #include #include #include #include #include #include #include
static atomic_t master = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(master_lock);
static inline int ux500_enter_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
int this_cpu = smp_processor_id();
bool recouple = false;
if (atomic_inc_return(&master) == num_online_cpus()) {
/* With this lock, we prevent the other cpu toexitand enter
* this function again and become the master */
if (!spin_trylock(&master_lock))
goto wfi;
/* decouple the gic fromthe A9 cores */
if (prcmu_gic_decouple()) {
spin_unlock(&master_lock);
goto out;
}
/* If an error occur, we will have to recouple the gic
* manually */
recouple = true;
/* At this state, asthe gic is decoupled, ifthe other
* cpu isin WFI, we have the guarantee it won't be wake
* up, so we can safely go to retention */
if (!prcmu_is_cpu_in_wfi(this_cpu ? 0 : 1))
goto out;
/* The prcmu will be in charge of watching the interrupts
* and wake up the cpus */
if (prcmu_copy_gic_settings())
goto out;
/* Check inthe meantime an interrupt did
* not occur onthe gic ... */
if (prcmu_gic_pending_irq())
goto out;
/* ... andthe prcmu */
if (prcmu_pending_irq())
goto out;
/* Go tothe retention state, the prcmu will wait forthe
* cpu to go WFI and this is what happens after exiting this
* 'master' critical section */
if (prcmu_set_power_state(PRCMU_AP_IDLE, true, true))
goto out;
/* When we switch to retention, the prcmu isin charge
* of recoupling the gic automatically */
recouple = false;
spin_unlock(&master_lock);
}
wfi:
cpu_do_idle();
out:
atomic_dec(&master);
if (recouple) {
prcmu_gic_recouple();
spin_unlock(&master_lock);
}
return index;
}
static struct cpuidle_driver ux500_idle_driver = {
.name = "ux500_idle",
.owner = THIS_MODULE,
.states = {
ARM_CPUIDLE_WFI_STATE,
{
.enter = ux500_enter_idle,
.exit_latency = 70,
.target_residency = 260,
.flags = CPUIDLE_FLAG_TIMER_STOP,
.name = "ApIdle",
.desc = "ARM Retention",
},
},
.safe_state_index = 0,
.state_count = 2,
};
static int dbx500_cpuidle_probe(struct platform_device *pdev)
{
/* Configure wake up reasons */
prcmu_enable_wakeups(PRCMU_WAKEUP(ARM) | PRCMU_WAKEUP(RTC) |
PRCMU_WAKEUP(ABB));
return cpuidle_register(&ux500_idle_driver, NULL);
}
static struct platform_driver dbx500_cpuidle_plat_driver = {
.driver = {
.name = "cpuidle-dbx500",
},
.probe = dbx500_cpuidle_probe,
};
builtin_platform_driver(dbx500_cpuidle_plat_driver);
综上分析,可以给出 Linux CPUIdle子系通总体架构,如下图: