1 优先级的内核表示
内核使用 0 - 139 表示内部优先级,值越低,优先级越高.0 -99 实时进程使用 nice
值 [-20,19]映射到范围100 - 139,如下图
内核定义了一系列宏来辅助优先级之间的转换
sched.h
1 /*
2 * Priority of a process goes from 0..MAX_PRIO-1, valid RT
3 * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
4 * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
5 * values are inverted: lower p->prio value means higher priority.
6 *
7 * The MAX_USER_RT_PRIO value allows the actual maximum
8 * RT priority to be separate from the value exported to
9 * user-space. This allows kernel threads to set their
10 * priority to a value higher than any user task. Note:
11 * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
12 */
13
14 #define MAX_USER_RT_PRIO 100
15 #define MAX_RT_PRIO MAX_USER_RT_PRIO
16
17 #define MAX_PRIO (MAX_RT_PRIO + 40)
18 #define DEFAULT_PRIO (MAX_RT_PRIO + 20)
sched.c
1 /*
2 * Convert user-nice values [ -20 ... 0 ... 19 ]
3 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
4 * and back.
5 */
6 #define NICE_TO_PRIO(nice) (MAX_RT_PRIO + (nice) + 20)
7 #define PRIO_TO_NICE(prio) ((prio) - MAX_RT_PRIO - 20)
8 #define TASK_NICE(p) PRIO_TO_NICE((p)->static_prio)
2 优先级计算
动态优先级 task_struct->prio
普通优先级 task_struct->normal_prio
静态优先级 task_struct->static_prio (计算起点,已经设置好)
sched.c
1 /*
2 * Calculate the current priority, i.e. the priority
3 * taken into account by the scheduler. This value might
4 * be boosted by RT tasks, or might be boosted by
5 * interactivity modifiers. Will be RT if the task got
6 * RT-boosted. If not then it returns p->normal_prio.
7 */
8 static int effective_prio(struct task_struct *p)
9 {
10 p->normal_prio = normal_prio(p);
11 /*
12 * If we are RT tasks or we were boosted to RT priority,
13 * keep the priority unchanged. Otherwise, update priority
14 * to the normal priority:
15 */
16 if (!rt_prio(p->prio))
17 return p->normal_prio;
18 return p->prio;
19 }
rt_prio检测普通优先级是否在实时范围中
1 static inline int rt_prio(int prio)
2 {
3 if (unlikely(prio < MAX_RT_PRIO))
4 return 1;
5 return 0;
6 }
普通优先级计算分为 普通进程 和 实时进程 ,普通进程用__normal_prio,实时进程需要rt_priority设置,rt_priority越高,表示优先级越高的实时进程,内核正好相反,因此内核用
MAX_RT_PRIO-1 - p->rt_priority 计算
/*
* __normal_prio - return the priority that is based on the static prio
*/
static inline int __normal_prio(struct task_struct *p)
{
return p->static_prio;
}/*
* Calculate the expected normal priority: i.e. priority
* without taking RT-inheritance into account. Might be
* boosted by interactivity modifiers. Changes upon fork,
* setprio syscalls, and whenever the interactivity
* estimator recalculates.
*/
static inline int normal_prio(struct task_struct *p)
{
int prio;if (task_has_rt_policy(p))
prio = MAX_RT_PRIO-1 - p->rt_priority;
else
prio = __normal_prio(p);
return prio;
}
下图描述了不同类型上述计算结果
注意以下两点:
- 新建进程用wake_up_new_task唤醒,或使用nice
系统调用改变静态优先级,使用上述方法计算nice - 进程分支出子进程,子进程静态优先级继承父进程,子进程的动态优先级,子进程的动态优先级(prio)设置为父进程的普通优先级.
3 计算负载权重
set_load_weight负责根据进程类型及静态优先级计算负载权重
sched.h
1 struct load_weight {
2 unsigned long weight, inv_weight;
3 };
一般来说 降低一个 nice值,多获得10% CPU,反之也一样,为了执行该策略,内核将优先级转换为权重,如下
1 /*
2 * Nice levels are multiplicative, with a gentle 10% change for every
3 * nice level changed. I.e. when a CPU-bound task goes from nice 0 to
4 * nice 1, it will get ~10% less CPU time than another CPU-bound task
5 * that remained on nice 0.
6 *
7 * The "10% effect" is relative and cumulative: from _any_ nice level,
8 * if you go up 1 level, it‘s -10% CPU usage, if you go down 1 level
9 * it‘s +10% CPU usage. (to achieve that we use a multiplier of 1.25.
10 * If a task goes up by ~10% and another task goes down by ~10% then
11 * the relative distance between them is ~25%.)
12 */
13 static const int prio_to_weight[40] = {
14 /* -20 */ 88761, 71755, 56483, 46273, 36291,
15 /* -15 */ 29154, 23254, 18705, 14949, 11916,
16 /* -10 */ 9548, 7620, 6100, 4904, 3906,
17 /* -5 */ 3121, 2501, 1991, 1586, 1277,
18 /* 0 */ 1024, 820, 655, 526, 423,
19 /* 5 */ 335, 272, 215, 172, 137,
20 /* 10 */ 110, 87, 70, 56, 45,
21 /* 15 */ 36, 29, 23, 18, 15,
22 };
具体转换代码如下,实时进程的权重是普通进程的2倍,SCHED_IDLE进程权重很小
1 #define WEIGHT_IDLEPRIO 3
2 #define WMULT_IDLEPRIO 1431655765
1 static void set_load_weight(struct task_struct *p)
2 {
3 if (task_has_rt_policy(p)) {
4 p->se.load.weight = prio_to_weight[0] * 2;
5 p->se.load.inv_weight = prio_to_wmult[0] >> 1;
6 return;
7 }
8
9 /*
10 * SCHED_IDLE tasks get minimal weight:
11 */
12 if (p->policy == SCHED_IDLE) {
13 p->se.load.weight = WEIGHT_IDLEPRIO;
14 p->se.load.inv_weight = WMULT_IDLEPRIO;
15 return;
16 }
17
18 p->se.load.weight = prio_to_weight[p->static_prio - MAX_RT_PRIO];
19 p->se.load.inv_weight = prio_to_wmult[p->static_prio - MAX_RT_PRIO];
20 }
进程队列也有一个负载权重,每次进程倍加入到内核队列的时候,会调用inc_nr_running,这样可以确保就绪队列跟踪记录有多少进程在运行,而且还将进程的权重添加到就绪队列的权重里面,从就绪队列移除时候也会调用对应的函数
1 /*
2 * Update delta_exec, delta_fair fields for rq.
3 *
4 * delta_fair clock advances at a rate inversely proportional to
5 * total load (rq->load.weight) on the runqueue, while
6 * delta_exec advances at the same rate as wall-clock (provided
7 * cpu is not idle).
8 *
9 * delta_exec / delta_fair is a measure of the (smoothened) load on this
10 * runqueue over any given interval. This (smoothened) load is used
11 * during load balance.
12 *
13 * This function is called /before/ updating rq->load
14 * and when switching tasks.
15 */
16 static inline void inc_load(struct rq *rq, const struct task_struct *p)
17 {
18 update_load_add(&rq->load, p->se.load.weight);
19 }
20
21 static inline void dec_load(struct rq *rq, const struct task_struct *p)
22 {
23 update_load_sub(&rq->load, p->se.load.weight);
24 }
25
26 static void inc_nr_running(struct task_struct *p, struct rq *rq)
27 {
28 rq->nr_running++;
29 inc_load(rq, p);
30 }
Linux内核架构读书笔记 - 2.5.3 处理优先级,码迷,mamicode.com