sched: reduce balance-tasks overhead

At the moment, balance_tasks() provides low level functionality for both
  move_tasks() and move_one_task() (indirectly) via the load_balance()
function (in the sched_class interface) which also provides dual
functionality.  This dual functionality complicates the interfaces and
internal mechanisms and makes the run time overhead of operations that
are called with two run queue locks held.

This patch addresses this issue and reduces the overhead of these
operations.

Signed-off-by: Peter Williams <pwil3058@bigpond.net.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Peter Williams 2007-10-24 18:23:51 +02:00 committed by Ingo Molnar
parent a0f846aa76
commit e1d1484f72
5 changed files with 135 additions and 57 deletions

View file

@ -838,11 +838,35 @@ struct rq_iterator {
struct task_struct *(*next)(void *);
};
static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, unsigned long *load_moved,
int *this_best_prio, struct rq_iterator *iterator);
#ifdef CONFIG_SMP
static unsigned long
balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_load_move, struct sched_domain *sd,
enum cpu_idle_type idle, int *all_pinned,
int *this_best_prio, struct rq_iterator *iterator);
static int
iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle,
struct rq_iterator *iterator);
#else
static inline unsigned long
balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_load_move, struct sched_domain *sd,
enum cpu_idle_type idle, int *all_pinned,
int *this_best_prio, struct rq_iterator *iterator)
{
return 0;
}
static inline int
iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle,
struct rq_iterator *iterator)
{
return 0;
}
#endif
#include "sched_stats.h"
#include "sched_idletask.c"
@ -2224,17 +2248,17 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
return 1;
}
static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, unsigned long *load_moved,
int *this_best_prio, struct rq_iterator *iterator)
static unsigned long
balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_load_move, struct sched_domain *sd,
enum cpu_idle_type idle, int *all_pinned,
int *this_best_prio, struct rq_iterator *iterator)
{
int pulled = 0, pinned = 0, skip_for_load;
struct task_struct *p;
long rem_load_move = max_load_move;
if (max_nr_move == 0 || max_load_move == 0)
if (max_load_move == 0)
goto out;
pinned = 1;
@ -2267,7 +2291,7 @@ next:
* We only want to steal up to the prescribed number of tasks
* and the prescribed amount of weighted load.
*/
if (pulled < max_nr_move && rem_load_move > 0) {
if (rem_load_move > 0) {
if (p->prio < *this_best_prio)
*this_best_prio = p->prio;
p = iterator->next(iterator->arg);
@ -2275,7 +2299,7 @@ next:
}
out:
/*
* Right now, this is the only place pull_task() is called,
* Right now, this is one of only two places pull_task() is called,
* so we can safely collect pull_task() stats here rather than
* inside pull_task().
*/
@ -2283,8 +2307,8 @@ out:
if (all_pinned)
*all_pinned = pinned;
*load_moved = max_load_move - rem_load_move;
return pulled;
return max_load_move - rem_load_move;
}
/*
@ -2306,7 +2330,7 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
do {
total_load_moved +=
class->load_balance(this_rq, this_cpu, busiest,
ULONG_MAX, max_load_move - total_load_moved,
max_load_move - total_load_moved,
sd, idle, all_pinned, &this_best_prio);
class = class->next;
} while (class && max_load_move > total_load_moved);
@ -2314,6 +2338,32 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
return total_load_moved > 0;
}
static int
iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle,
struct rq_iterator *iterator)
{
struct task_struct *p = iterator->start(iterator->arg);
int pinned = 0;
while (p) {
if (can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) {
pull_task(busiest, p, this_rq, this_cpu);
/*
* Right now, this is only the second place pull_task()
* is called, so we can safely collect pull_task()
* stats here rather than inside pull_task().
*/
schedstat_inc(sd, lb_gained[idle]);
return 1;
}
p = iterator->next(iterator->arg);
}
return 0;
}
/*
* move_one_task tries to move exactly one task from busiest to this_rq, as
* part of active balancing operations within "domain".
@ -2325,12 +2375,9 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
struct sched_domain *sd, enum cpu_idle_type idle)
{
const struct sched_class *class;
int this_best_prio = MAX_PRIO;
for (class = sched_class_highest; class; class = class->next)
if (class->load_balance(this_rq, this_cpu, busiest,
1, ULONG_MAX, sd, idle, NULL,
&this_best_prio))
if (class->move_one_task(this_rq, this_cpu, busiest, sd, idle))
return 1;
return 0;
@ -3267,18 +3314,6 @@ static inline void idle_balance(int cpu, struct rq *rq)
{
}
/* Avoid "used but not defined" warning on UP */
static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
unsigned long max_nr_move, unsigned long max_load_move,
struct sched_domain *sd, enum cpu_idle_type idle,
int *all_pinned, unsigned long *load_moved,
int *this_best_prio, struct rq_iterator *iterator)
{
*load_moved = 0;
return 0;
}
#endif
DEFINE_PER_CPU(struct kernel_stat, kstat);