cpu_ops、cpu_psci_ops、psci_ops、suspend_ops以及arm_idle_driver

在内核中针对的cpu的操作,比如arm_cpuidle_init、arm_cpuidle_suspend、boot_secondary、secondary_start_kernel、op_cpu_disable、op_cpu_kill、cpu_die、smp_cpu_setup、smp_prepare_cpus的都会回落到对cpu_ops的调用。

cpu_ops将针对底层cpu的操作抽象为一系列回调函数,以统一的形式向上层提供API。

cpu_psci_ops作为cpu_ops的一个特殊实现,将cpu_ops关联到PSCI的psci_ops。

psci_ops的函数在PSCI Firmware中实现,提供一系列基于Function ID的调用。

这种分层思想将内核通用cpu_operations和硬件相关部分分隔开。

cpu_operations及其应用

首先分析一些cpu_operations这个结构体:


struct cpu_operations {
    const char    *name;
    int        (*cpu_init)(unsigned int);  读取必要的数据准备初始化。
    int        (*cpu_prepare)(unsigned int);  启动前准备工作
    int        (*cpu_boot)(unsigned int);  启动一个CPU
    void        (*cpu_postboot)(void);  执行boot后的清理工作
#ifdef CONFIG_HOTPLUG_CPU
    int        (*cpu_disable)(unsigned int cpu);  关闭CPU之前的准备工作
    void        (*cpu_die)(unsigned int cpu);  关闭CPU
    int        (*cpu_kill)(unsigned int cpu);  确认是否关闭
#endif
#ifdef CONFIG_CPU_IDLE
    int        (*cpu_init_idle)(unsigned int);  读取CPU idle状态的参数
    int        (*cpu_suspend)(unsigned long);  suspend一个CPU,并且保存上下文
#endif
};

cpu_init


static int __init smp_cpu_setup(int cpu)
{
    if (cpu_read_ops(cpu))
        return -ENODEV;

if (cpu_ops[cpu]->cpu_init(cpu))
        return -ENODEV;

set_cpu_possible(cpu, true);

return 0;
}

获取指定cpu的cpu_ops,执行cpu_init回调函数进行初始化。并将此cpu设置为possible。

cpu_prepare


void __init smp_prepare_cpus(unsigned int max_cpus)
{
    int err;
    unsigned int cpu, ncores = num_possible_cpus();

init_cpu_topology();  填充cpu_topology结构体数组

smp_store_cpu_info(smp_processor_id());

/*
     * are we trying to boot more cores than exist?
     */
    if (max_cpus > ncores)  不能超过possible cpu数目
        max_cpus = ncores;

/* Don‘t bother if we‘re effectively UP */
    if (max_cpus <= 1)
        return;

/*
     * Initialise the present map (which describes the set of CPUs
     * actually populated at the present time) and release the
     * secondaries from the bootloader.
     *
     * Make sure we online at most (max_cpus - 1) additional CPUs.
     */
    max_cpus--;
    for_each_possible_cpu(cpu) {
        if (max_cpus == 0)
            break;

if (cpu == smp_processor_id())
            continue;

if (!cpu_ops[cpu])
            continue;

err = cpu_ops[cpu]->cpu_prepare(cpu);  执行.cpu_prepare回调函数,将指定cpu设置为present。
        if (err)
            continue;

set_cpu_present(cpu, true);
        max_cpus--;
    }
}

cpu_boot


static int boot_secondary(unsigned int cpu, struct task_struct *idle)
{
    if (cpu_ops[cpu]->cpu_boot)
        return cpu_ops[cpu]->cpu_boot(cpu);

return -EOPNOTSUPP;
}

cpu_postboot


asmlinkage void secondary_start_kernel(void)
{
    struct mm_struct *mm = &init_mm;
    unsigned int cpu = smp_processor_id();

/*
     * All kernel threads share the same mm context; grab a
     * reference and switch to it.
     */
    atomic_inc(&mm->mm_count);
    current->active_mm = mm;

set_my_cpu_offset(per_cpu_offset(smp_processor_id()));

/*
     * TTBR0 is only used for the identity mapping at this stage. Make it
     * point to zero page to avoid speculatively fetching new entries.
     */
    cpu_set_reserved_ttbr0();
    local_flush_tlb_all();
    cpu_set_default_tcr_t0sz();

preempt_disable();
    trace_hardirqs_off();

/*
     * If the system has established the capabilities, make sure
     * this CPU ticks all of those. If it doesn‘t, the CPU will
     * fail to come online.
     */
    verify_local_cpu_capabilities();

if (cpu_ops[cpu]->cpu_postboot)
        cpu_ops[cpu]->cpu_postboot();

/*
     * Log the CPU info before it is marked online and might get read.
     */
    cpuinfo_store_cpu();

/*
     * Enable GIC and timers.
     */
    notify_cpu_starting(cpu);

smp_store_cpu_info(cpu);

/*
     * OK, now it‘s safe to let the boot CPU continue.  Wait for
     * the CPU migration code to notice that the CPU is online
     * before we continue.
     */
    pr_info("CPU%u: Booted secondary processor [%08x]\n",
                     cpu, read_cpuid_id());
    set_cpu_online(cpu, true);
    complete(&cpu_running);

local_dbg_enable();
    local_irq_enable();
    local_async_enable();

/*
     * OK, it‘s off to the idle thread for us
     */
    cpu_startup_entry(CPUHP_ONLINE);
}

cpu_disable


static int op_cpu_disable(unsigned int cpu)
{
    /*
     * If we don‘t have a cpu_die method, abort before we reach the point
     * of no return. CPU0 may not have an cpu_ops, so test for it.
     */
    if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die)
        return -EOPNOTSUPP;

/*
     * We may need to abort a hot unplug for some other mechanism-specific
     * reason.
     */
    if (cpu_ops[cpu]->cpu_disable)
        return cpu_ops[cpu]->cpu_disable(cpu);

return 0;
}

cpu_die


void cpu_die(void)
{
    unsigned int cpu = smp_processor_id();

idle_task_exit();

local_irq_disable();

/* Tell __cpu_die() that this CPU is now safe to dispose of */
    (void)cpu_report_death();

/*
     * Actually shutdown the CPU. This must never fail. The specific hotplug
     * mechanism must perform all required cache maintenance to ensure that
     * no dirty lines are lost in the process of shutting down the CPU.
     */
    cpu_ops[cpu]->cpu_die(cpu);

BUG();
}

cpu_kill


static int op_cpu_kill(unsigned int cpu)
{
    /*
     * If we have no means of synchronising with the dying CPU, then assume
     * that it is really dead. We can only wait for an arbitrary length of
     * time and hope that it‘s dead, so let‘s skip the wait and just hope.
     */
    if (!cpu_ops[cpu]->cpu_kill)
        return 0;

return cpu_ops[cpu]->cpu_kill(cpu);
}

cpu_init_idle


int __init arm_cpuidle_init(unsigned int cpu)
{
    int ret = -EOPNOTSUPP;

if (cpu_ops[cpu] && cpu_ops[cpu]->cpu_init_idle)
        ret = cpu_ops[cpu]->cpu_init_idle(cpu);

return ret;
}

cpu_suspend


int arm_cpuidle_suspend(int index)
{
    int cpu = smp_processor_id();

/*
     * If cpu_ops have not been registered or suspend
     * has not been initialized, cpu_suspend call fails early.
     */
    if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend)
        return -EOPNOTSUPP;
    return cpu_ops[cpu]->cpu_suspend(index);
}

cpu_ops到arch-dependent的关联

以start_kernel为起点,查看从内核开始到获取cpu_ops的路径如下:

start_kernel
    -->setup_arch
        -->cpu_read_bootcpu_ops  只获取bootcpu的cpu_ops
            -->cpu_read_bootcpu_ops
                -->cpu_read_ops(0)
        -->smp_init_cpus  获取nonboot cpu的cpu_ops
            -->smp_cpu_setup
                -->cpu_read_ops

cpu_read_ops是获取cpu_ops的关键,参数是cpu的序列号,输出是cpu_ops[cpu]。


int __init cpu_read_ops(int cpu)
{
    const char *enable_method = cpu_read_enable_method(cpu);  从DeviceTree获取enable_method字符串

if (!enable_method)
        return -ENODEV;

cpu_ops[cpu] = cpu_get_ops(enable_method);  根据enable_method字符串在supported_cpu_ops获取指针
    if (!cpu_ops[cpu]) {
        pr_warn("Unsupported enable-method: %s\n", enable_method);
        return -EOPNOTSUPP;
    }

return 0;
}

通过cpu0的DeviceTree可以看出enable-method为pcsi。

支持的cpu_operations有:


static const struct cpu_operations *supported_cpu_ops[] __initconst = {
    &smp_spin_table_ops,
    &cpu_psci_ops,
    NULL,
};

所以cpu_ops=&cpu_psci_ops。

cpu_psci_ops分析

cpu_psci_ops结构体可以说是cpu_operations和psci_operations的桥梁,他讲cpu_operations的一些列回调函数,映射到psci_operations。


const struct cpu_operations cpu_psci_ops = {
    .name        = "psci",
#ifdef CONFIG_CPU_IDLE
    .cpu_init_idle    = cpu_psci_cpu_init_idle,  从DeviceTree获取CPU idle状态数据
    .cpu_suspend    = cpu_psci_cpu_suspend,  根据是否丢失上下文来选择是psci_ops.cpu_suspend还是cpu_suspend
#endif
    .cpu_init    = cpu_psci_cpu_init,  为空
    .cpu_prepare    = cpu_psci_cpu_prepare,  只是判断psci_ops.cpu_on是否存在,不存在则返回错误。
    .cpu_boot    = cpu_psci_cpu_boot,  调用psci_ops.cpu_on
#ifdef CONFIG_HOTPLUG_CPU
    .cpu_disable    = cpu_psci_cpu_disable,  检查是否支持psci_ops.cpu_off。
    .cpu_die    = cpu_psci_cpu_die,  调用psci_ops.cpu_off
    .cpu_kill    = cpu_psci_cpu_kill,  检查指定cpu是否已经被kill
#endif
}

cpu_psci_cpu_boot


static int cpu_psci_cpu_boot(unsigned int cpu)
{
    int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry));
    if (err)
        pr_err("failed to boot CPU%d (%d)\n", cpu, err);

return err;
}

CPU_ON用于secondary boot、hotplug或者big.LITTLE迁移。如果需要从一个核启动另一个核,通过CPU_ON提供一个入口地址和上下文标识。

PCSI提供必要的操作启动一个核,并且在提供的入口地址开始执行,上下文标识必须存在R0或者W0中。这里的入口地址就对应secondary_entry。

在arch/arm64/kernel/head.S中:

secondary_entry—>secondary_startup—>__secondary_switched—>secondary_start_kernel

ENTRY(secondary_entry)
    bl    el2_setup            // Drop to EL1
    bl    set_cpu_boot_mode_flag
    b    secondary_startup
ENDPROC(secondary_entry)

ENTRY(secondary_startup)
    /*
     * Common entry point for secondary CPUs.
     */
    adrp    x25, idmap_pg_dir
    adrp    x26, swapper_pg_dir
    bl    __cpu_setup            // initialise processor

ldr    x21, =secondary_data
    ldr    x27, =__secondary_switched    // address to jump to after enabling the MMU
    b    __enable_mmu
ENDPROC(secondary_startup)

ENTRY(__secondary_switched)
    ldr    x0, [x21]            // get secondary_data.stack
    mov    sp, x0
    mov    x29, #0
    b    secondary_start_kernel
ENDPROC(__secondary_switched)

在secondary_start_kernel将CPU设置为online,并调用.cpu_postboot回调函数,进行boot后处理。然后cpu_startup_entry启动idle线程。

cpu_psci_cpu_init_idle


static int __maybe_unused cpu_psci_cpu_init_idle(unsigned int cpu)
{
    int i, ret, count = 0;
    u32 *psci_states;
    struct device_node *state_node, *cpu_node;

cpu_node = of_get_cpu_node(cpu, NULL);
    if (!cpu_node)
        return -ENODEV;

/*
     * If the PSCI cpu_suspend function hook has not been initialized
     * idle states must not be enabled, so bail out
     */
    if (!psci_ops.cpu_suspend)
        return -EOPNOTSUPP;

/* Count idle states */
    while ((state_node = of_parse_phandle(cpu_node, "cpu-idle-states",
                          count))) {
        count++;
        of_node_put(state_node);
    }

if (!count)
        return -ENODEV;

psci_states = kcalloc(count, sizeof(*psci_states), GFP_KERNEL);
    if (!psci_states)
        return -ENOMEM;

for (i = 0; i < count; i++) {
        u32 state;

state_node = of_parse_phandle(cpu_node, "cpu-idle-states", i);

ret = of_property_read_u32(state_node,
                       "arm,psci-suspend-param",
                       &state);
        if (ret) {
            pr_warn(" * %s missing arm,psci-suspend-param property\n",
                state_node->full_name);
            of_node_put(state_node);
            goto free_mem;
        }

of_node_put(state_node);
        pr_debug("psci-power-state %#x index %d\n", state, i);
        if (!psci_power_state_is_valid(state)) {
            pr_warn("Invalid PSCI power state %#x\n", state);
            ret = -EINVAL;
            goto free_mem;
        }
        psci_states[i] = state;
    }
    /* Idle states parsed correctly, initialize per-cpu pointer */
    per_cpu(psci_power_state, cpu) = psci_states;
    return 0;

free_mem:
    kfree(psci_states);
    return ret;
}

1.解析DeviceTree中cpu下的cpu-idle-states属性

2.从每个state中获取arm,psci-suspend-param的参数,并验证是否有效。

3.初始化per-CPU类型的指针psci_power_state。

cpu_psci_cpu_suspend


static int __maybe_unused cpu_psci_cpu_suspend(unsigned long index)
{
    int ret;
    u32 *state = __this_cpu_read(psci_power_state);  从psci_power_state中读取suspend的state参数。
    /*
     * idle state index 0 corresponds to wfi, should never be called
     * from the cpu_suspend operations
     */
    if (WARN_ON_ONCE(!index))
        return -EINVAL;

if (!psci_power_state_loses_context(state[index - 1]))
        ret = psci_ops.cpu_suspend(state[index - 1], 0);
    else
        ret = cpu_suspend(index, psci_suspend_finisher);

return ret;
}

psci_ops

由于acpi_disabled,所以psci通过DeviceTree获取相关参数。

start_kernel
    -->setup_arch
        -->psci_dt_init  这个函数在cpu_ops之前,因为cpu_ops依赖psci_ops

psci有不同版本,需要通过DeviceTree获取版本信息和使用的method(是smc还是)。

通过查看DeviceTree可以看到对应的是psci_0_2_init。


static const struct of_device_id const psci_of_match[] __initconst = {
    { .compatible = "arm,psci",    .data = psci_0_1_init},
    { .compatible = "arm,psci-0.2",    .data = psci_0_2_init},
    { .compatible = "arm,psci-1.0",    .data = psci_0_2_init},
    {},
};

psci_dt_init解析DeviceTree执行对应psci版本的初始化函数。


int __init psci_dt_init(void)
{
    struct device_node *np;
    const struct of_device_id *matched_np;
    psci_initcall_t init_fn;

np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np);

if (!np)
        return -ENODEV;

init_fn = (psci_initcall_t)matched_np->data;
    return init_fn(np);
}

psci_0_2_init设置method,然后调用psci_probe:


static int __init psci_0_2_init(struct device_node *np)
{
    int err;

err = get_set_conduit_method(np);  从DeviceTree可知invoke_psci_fn = __invoke_psci_fn_smc

if (err)
        goto out_put_node;
    /*
     * Starting with v0.2, the PSCI specification introduced a call
     * (PSCI_VERSION) that allows probing the firmware version, so
     * that PSCI function IDs and version specific initialization
     * can be carried out according to the specific version reported
     * by firmware
     */
    err = psci_probe();

out_put_node:
    of_node_put(np);
    return err;
}

psci_probe设置PSCI版本高于0.2的回调函数,以及arm_pm_restart和pm_power_off。


static void __init psci_0_2_set_functions(void)
{
    pr_info("Using standard PSCI v0.2 function IDs\n");
    psci_function_id[PSCI_FN_CPU_SUSPEND] =
                    PSCI_FN_NATIVE(0_2, CPU_SUSPEND);
    psci_ops.cpu_suspend = psci_cpu_suspend;

psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF;
    psci_ops.cpu_off = psci_cpu_off;

psci_function_id[PSCI_FN_CPU_ON] = PSCI_FN_NATIVE(0_2, CPU_ON);
    psci_ops.cpu_on = psci_cpu_on;

psci_function_id[PSCI_FN_MIGRATE] = PSCI_FN_NATIVE(0_2, MIGRATE);
    psci_ops.migrate = psci_migrate;

psci_ops.affinity_info = psci_affinity_info;

psci_ops.migrate_info_type = psci_migrate_info_type;

arm_pm_restart = psci_sys_reset;

pm_power_off = psci_sys_poweroff;
}

这些函数都有一个共性invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0),着这里invoke_psci_fn指向__invoke_psci_fn_smc 。

__invoke_psci_fn_smc指向arch/arm64/kernel/psci-call.S定义的函数:


/* int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1, u64 arg2) */
ENTRY(__invoke_psci_fn_smc)
    smc    #0
    ret
ENDPROC(__invoke_psci_fn_smc)

http://infocenter.arm.com/help/topic/com.arm.doc.den0022c/DEN0022C_Power_State_Coordination_Interface.pdf Chapter5有PSCI函数圆形和相关参数返回值的介绍。

第一个参数是Function ID,后面三个参数作为Function ID的参数。如果使用的是32位的参数,后三个参数通过r0-r3传递给Function ID,r0存放返回值;如果使用64位的参数,后三个参数通过W0-W3传递,w0存放返回值。这些Function ID的实现,在对应的Firmware中,但是可以通过上述pdf查看输入输出细节。

PSCI除了提供psci_ops的回调函数之外,还提供以restart和power off的arch-dependent函数arm_pm_restart和pm_power_off

比如machine_power_off和machine_restart调用:


void machine_power_off(void)
{
    local_irq_disable();
    smp_send_stop();
    if (pm_power_off)
        pm_power_off();
}

void machine_restart(char *cmd)
{
    /* Disable interrupts first */
    local_irq_disable();
    smp_send_stop();

/*
     * UpdateCapsule() depends on the system being reset via
     * ResetSystem().
     */
    if (efi_enabled(EFI_RUNTIME_SERVICES))
        efi_reboot(reboot_mode, NULL);

/* Now call the architecture specific reboot code. */
    if (arm_pm_restart)
        arm_pm_restart(reboot_mode, cmd);
    else
        do_kernel_restart(cmd);

/*
     * Whoops - the architecture was unable to reboot.
     */
    printk("Reboot failed -- System halted\n");
    while (1);
}

参考文档

Linux CPU core的电源管理(3)_cpu ops:http://www.wowotech.net/pm_subsystem/cpu_ops.html

时间: 2024-10-27 07:00:01

cpu_ops、cpu_psci_ops、psci_ops、suspend_ops以及arm_idle_driver的相关文章

Linux下Power Management开发总结

本文作为一个提纲挈领的介绍性文档,后面会以此展开,逐渐丰富. 关于Linux省电: 保持CPU处于工作状态时: 1. 设备使能RPM,不使用的设备动态关闭. 2. cpufreq动态调节CPU/GPU的频率电压,以达到降低功耗的目的. 3. cpuidle让CPU进入idle状态,或者深睡. 4. cpu hotplug对不使用的CPU进行动态热插拔. CPU进入睡眠状态: 1. suspend,如果整个系统无事可干进入suspend,包括两种suspend idle和suspend to ra

linux驱动程序之电源管理 之linux休眠与唤醒(2)

在Linux中,休眠主要分三个主要的步骤:(1)冻结用户态进程和内核态任务:(2)调用注册的设备的suspend的回调函数:(3)按照注册顺序休眠核心设备和使CPU进入休眠态.       冻结进程是内核把进程列表中所有的进程的状态都设置为停止,并且保存下所有进程的上下文.当这些进程被解冻的时候,他们是不知道自己被冻结过的,只是简单的继续执行.如何让Linux进入休眠呢?用户可以通过读写sys文件/sys /power/state 是实现控制系统进入休眠.比如: # echo standby >

linux驱动程序之电源管理之标准linux休眠和唤醒机制分析(二)

三.pm_test属性文件读写 int pm_test_level = TEST_NONE; static const char * const  pm_tests[__TEST_AFTER_LAST] = { [TEST_NONE] = "none", [TEST_CORE] = "core", [TEST_CPUS] = "processors", [TEST_PLATFORM] = "platform", [TEST_D

Linux Power Managment详解 【转】

转自:http://blog.chinaunix.net/uid-24517893-id-254740.html Linux Power Managment 谨以此文纪念过往的岁月 一.前言 在这个对节能要求越来越严格的年代,对设备的电源管理就显的很重要的了,尤其对于可移动设备,在电源有限的情况下,续航能力就显的很重要的.在本文中将介绍linux是如何对设备电源进行管理的. 二.睡眠 Linux的电源管理的主要几个文件集中在/kernel/power/main.c和/driver/base/po

android休眠唤醒驱动流程分析【转】

转自:http://blog.csdn.net/hanmengaidudu/article/details/11777501 标准linux休眠过程: l        power management notifiers are executed with PM_SUSPEND_PREPARE l        tasks are frozen l        target system sleep state is announced to the platform-handling co

基于wakeup_source的linux内核睡眠机制

一:wakeup_source简介: linux 3.4内核PM使用了wakeup_source来保持唤醒状态,也就是keep awake.之前android一直是基于Linux加入了wake_lock机制来阻止系统休眠,后来Linux 3.4内核加入了wakeup_source来管理,安卓4.4跟着升级内核也就摒弃了自己的繁杂的wake_lock机制,在对上层接口并不改变,在内核wake_lock实现直接基于wakeup_source来实现的.当然也会带来debug上的一些问题,比如以前的wa

Linux电源管理【转】

转自:http://www.cnblogs.com/sky-zhang/archive/2012/06/05/2536807.html PM notifier机制: 应用场景: There are some operations that subsystems or drivers may want to carry out before hibernation/suspend or after restore/resume, but they require the system to be

从sys/power/state分析并实现S3C2416的睡眠和唤醒

环境: PC: debian-7.6.0 ARM CPU: S3C2416 Linux-Kernel: 3.6.0(FriendlyARM) U-boot: 1.3.4 一.问题来源 根据需要,在S3C2416上添加中断睡眠和唤醒功能,于是我就查查Linux支持S3C2416的睡眠模式: cat /sys/power/state 执行完,万万没想到:竟然是空的,该命令没有任何输出!也就是说,我的内核目前不支持任何方式的睡眠. 不可能啊!之前我用S3C2440的CPU(内核版本Linux_2_6_

Linux Suspend过程【转】

转自:http://blog.csdn.net/chen198746/article/details/15809363 目录(?)[-] Linux Suspend简介 Suspend流程 enter_statePM_SUSPEND_MEM 31 准备并冻结进程suspend_prepare 32 Suspend外部设备suspend_devices_and_enter 21 suspend_console 22 dpm_suspend_start  PMSG_SUSPEND 23 suspen