mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-06-20 21:51:05 +00:00
[PATCH] per-task-delay-accounting: delay accounting usage of taskstats interface
Usage of taskstats interface by delay accounting. Signed-off-by: Shailabh Nagar <nagar@us.ibm.com> Signed-off-by: Balbir Singh <balbir@in.ibm.com> Cc: Jes Sorensen <jes@sgi.com> Cc: Peter Chubb <peterc@gelato.unsw.edu.au> Cc: Erich Focht <efocht@ess.nec.de> Cc: Levent Serinol <lserinol@gmail.com> Cc: Jay Lan <jlan@engr.sgi.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
This commit is contained in:
parent
c757249af1
commit
6f44993fe1
7 changed files with 144 additions and 7 deletions
|
@ -18,6 +18,7 @@
|
||||||
#define _LINUX_DELAYACCT_H
|
#define _LINUX_DELAYACCT_H
|
||||||
|
|
||||||
#include <linux/sched.h>
|
#include <linux/sched.h>
|
||||||
|
#include <linux/taskstats_kern.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Per-task flags relevant to delay accounting
|
* Per-task flags relevant to delay accounting
|
||||||
|
@ -35,6 +36,7 @@ extern void __delayacct_tsk_init(struct task_struct *);
|
||||||
extern void __delayacct_tsk_exit(struct task_struct *);
|
extern void __delayacct_tsk_exit(struct task_struct *);
|
||||||
extern void __delayacct_blkio_start(void);
|
extern void __delayacct_blkio_start(void);
|
||||||
extern void __delayacct_blkio_end(void);
|
extern void __delayacct_blkio_end(void);
|
||||||
|
extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *);
|
||||||
|
|
||||||
static inline void delayacct_set_flag(int flag)
|
static inline void delayacct_set_flag(int flag)
|
||||||
{
|
{
|
||||||
|
@ -74,6 +76,16 @@ static inline void delayacct_blkio_end(void)
|
||||||
__delayacct_blkio_end();
|
__delayacct_blkio_end();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int delayacct_add_tsk(struct taskstats *d,
|
||||||
|
struct task_struct *tsk)
|
||||||
|
{
|
||||||
|
if (likely(!delayacct_on))
|
||||||
|
return -EINVAL;
|
||||||
|
if (!tsk->delays)
|
||||||
|
return 0;
|
||||||
|
return __delayacct_add_tsk(d, tsk);
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
static inline void delayacct_set_flag(int flag)
|
static inline void delayacct_set_flag(int flag)
|
||||||
{}
|
{}
|
||||||
|
@ -89,6 +101,9 @@ static inline void delayacct_blkio_start(void)
|
||||||
{}
|
{}
|
||||||
static inline void delayacct_blkio_end(void)
|
static inline void delayacct_blkio_end(void)
|
||||||
{}
|
{}
|
||||||
|
static inline int delayacct_add_tsk(struct taskstats *d,
|
||||||
|
struct task_struct *tsk)
|
||||||
|
{ return 0; }
|
||||||
#endif /* CONFIG_TASK_DELAY_ACCT */
|
#endif /* CONFIG_TASK_DELAY_ACCT */
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -990,6 +990,7 @@ struct task_struct {
|
||||||
*/
|
*/
|
||||||
struct pipe_inode_info *splice_pipe;
|
struct pipe_inode_info *splice_pipe;
|
||||||
#ifdef CONFIG_TASK_DELAY_ACCT
|
#ifdef CONFIG_TASK_DELAY_ACCT
|
||||||
|
spinlock_t delays_lock;
|
||||||
struct task_delay_info *delays;
|
struct task_delay_info *delays;
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
|
@ -34,7 +34,60 @@
|
||||||
struct taskstats {
|
struct taskstats {
|
||||||
|
|
||||||
/* Version 1 */
|
/* Version 1 */
|
||||||
__u64 version;
|
__u16 version;
|
||||||
|
__u16 padding[3]; /* Userspace should not interpret the padding
|
||||||
|
* field which can be replaced by useful
|
||||||
|
* fields if struct taskstats is extended.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Delay accounting fields start
|
||||||
|
*
|
||||||
|
* All values, until comment "Delay accounting fields end" are
|
||||||
|
* available only if delay accounting is enabled, even though the last
|
||||||
|
* few fields are not delays
|
||||||
|
*
|
||||||
|
* xxx_count is the number of delay values recorded
|
||||||
|
* xxx_delay_total is the corresponding cumulative delay in nanoseconds
|
||||||
|
*
|
||||||
|
* xxx_delay_total wraps around to zero on overflow
|
||||||
|
* xxx_count incremented regardless of overflow
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Delay waiting for cpu, while runnable
|
||||||
|
* count, delay_total NOT updated atomically
|
||||||
|
*/
|
||||||
|
__u64 cpu_count;
|
||||||
|
__u64 cpu_delay_total;
|
||||||
|
|
||||||
|
/* Following four fields atomically updated using task->delays->lock */
|
||||||
|
|
||||||
|
/* Delay waiting for synchronous block I/O to complete
|
||||||
|
* does not account for delays in I/O submission
|
||||||
|
*/
|
||||||
|
__u64 blkio_count;
|
||||||
|
__u64 blkio_delay_total;
|
||||||
|
|
||||||
|
/* Delay waiting for page fault I/O (swap in only) */
|
||||||
|
__u64 swapin_count;
|
||||||
|
__u64 swapin_delay_total;
|
||||||
|
|
||||||
|
/* cpu "wall-clock" running time
|
||||||
|
* On some architectures, value will adjust for cpu time stolen
|
||||||
|
* from the kernel in involuntary waits due to virtualization.
|
||||||
|
* Value is cumulative, in nanoseconds, without a corresponding count
|
||||||
|
* and wraps around to zero silently on overflow
|
||||||
|
*/
|
||||||
|
__u64 cpu_run_real_total;
|
||||||
|
|
||||||
|
/* cpu "virtual" running time
|
||||||
|
* Uses time intervals seen by the kernel i.e. no adjustment
|
||||||
|
* for kernel's involuntary waits due to virtualization.
|
||||||
|
* Value is cumulative, in nanoseconds, without a corresponding count
|
||||||
|
* and wraps around to zero silently on overflow
|
||||||
|
*/
|
||||||
|
__u64 cpu_run_virtual_total;
|
||||||
|
/* Delay accounting fields end */
|
||||||
|
/* version 1 ends here */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,7 @@ enum {
|
||||||
|
|
||||||
#ifdef CONFIG_TASKSTATS
|
#ifdef CONFIG_TASKSTATS
|
||||||
extern kmem_cache_t *taskstats_cache;
|
extern kmem_cache_t *taskstats_cache;
|
||||||
|
extern struct mutex taskstats_exit_mutex;
|
||||||
|
|
||||||
static inline void taskstats_exit_alloc(struct taskstats **ptidstats,
|
static inline void taskstats_exit_alloc(struct taskstats **ptidstats,
|
||||||
struct taskstats **ptgidstats)
|
struct taskstats **ptgidstats)
|
||||||
|
|
|
@ -173,6 +173,7 @@ config TASKSTATS
|
||||||
|
|
||||||
config TASK_DELAY_ACCT
|
config TASK_DELAY_ACCT
|
||||||
bool "Enable per-task delay accounting (EXPERIMENTAL)"
|
bool "Enable per-task delay accounting (EXPERIMENTAL)"
|
||||||
|
depends on TASKSTATS
|
||||||
help
|
help
|
||||||
Collect information on time spent by a task waiting for system
|
Collect information on time spent by a task waiting for system
|
||||||
resources like cpu, synchronous block I/O completion and swapping
|
resources like cpu, synchronous block I/O completion and swapping
|
||||||
|
|
|
@ -41,6 +41,10 @@ void delayacct_init(void)
|
||||||
|
|
||||||
void __delayacct_tsk_init(struct task_struct *tsk)
|
void __delayacct_tsk_init(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
|
spin_lock_init(&tsk->delays_lock);
|
||||||
|
/* No need to acquire tsk->delays_lock for allocation here unless
|
||||||
|
__delayacct_tsk_init called after tsk is attached to tasklist
|
||||||
|
*/
|
||||||
tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL);
|
tsk->delays = kmem_cache_zalloc(delayacct_cache, SLAB_KERNEL);
|
||||||
if (tsk->delays)
|
if (tsk->delays)
|
||||||
spin_lock_init(&tsk->delays->lock);
|
spin_lock_init(&tsk->delays->lock);
|
||||||
|
@ -48,8 +52,11 @@ void __delayacct_tsk_init(struct task_struct *tsk)
|
||||||
|
|
||||||
void __delayacct_tsk_exit(struct task_struct *tsk)
|
void __delayacct_tsk_exit(struct task_struct *tsk)
|
||||||
{
|
{
|
||||||
kmem_cache_free(delayacct_cache, tsk->delays);
|
struct task_delay_info *delays = tsk->delays;
|
||||||
|
spin_lock(&tsk->delays_lock);
|
||||||
tsk->delays = NULL;
|
tsk->delays = NULL;
|
||||||
|
spin_unlock(&tsk->delays_lock);
|
||||||
|
kmem_cache_free(delayacct_cache, delays);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -104,3 +111,56 @@ void __delayacct_blkio_end(void)
|
||||||
¤t->delays->blkio_delay,
|
¤t->delays->blkio_delay,
|
||||||
¤t->delays->blkio_count);
|
¤t->delays->blkio_count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk)
|
||||||
|
{
|
||||||
|
s64 tmp;
|
||||||
|
struct timespec ts;
|
||||||
|
unsigned long t1,t2,t3;
|
||||||
|
|
||||||
|
spin_lock(&tsk->delays_lock);
|
||||||
|
|
||||||
|
/* Though tsk->delays accessed later, early exit avoids
|
||||||
|
* unnecessary returning of other data
|
||||||
|
*/
|
||||||
|
if (!tsk->delays)
|
||||||
|
goto done;
|
||||||
|
|
||||||
|
tmp = (s64)d->cpu_run_real_total;
|
||||||
|
cputime_to_timespec(tsk->utime + tsk->stime, &ts);
|
||||||
|
tmp += timespec_to_ns(&ts);
|
||||||
|
d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* No locking available for sched_info (and too expensive to add one)
|
||||||
|
* Mitigate by taking snapshot of values
|
||||||
|
*/
|
||||||
|
t1 = tsk->sched_info.pcnt;
|
||||||
|
t2 = tsk->sched_info.run_delay;
|
||||||
|
t3 = tsk->sched_info.cpu_time;
|
||||||
|
|
||||||
|
d->cpu_count += t1;
|
||||||
|
|
||||||
|
jiffies_to_timespec(t2, &ts);
|
||||||
|
tmp = (s64)d->cpu_delay_total + timespec_to_ns(&ts);
|
||||||
|
d->cpu_delay_total = (tmp < (s64)d->cpu_delay_total) ? 0 : tmp;
|
||||||
|
|
||||||
|
tmp = (s64)d->cpu_run_virtual_total + (s64)jiffies_to_usecs(t3) * 1000;
|
||||||
|
d->cpu_run_virtual_total =
|
||||||
|
(tmp < (s64)d->cpu_run_virtual_total) ? 0 : tmp;
|
||||||
|
|
||||||
|
/* zero XXX_total, non-zero XXX_count implies XXX stat overflowed */
|
||||||
|
|
||||||
|
spin_lock(&tsk->delays->lock);
|
||||||
|
tmp = d->blkio_delay_total + tsk->delays->blkio_delay;
|
||||||
|
d->blkio_delay_total = (tmp < d->blkio_delay_total) ? 0 : tmp;
|
||||||
|
tmp = d->swapin_delay_total + tsk->delays->swapin_delay;
|
||||||
|
d->swapin_delay_total = (tmp < d->swapin_delay_total) ? 0 : tmp;
|
||||||
|
d->blkio_count += tsk->delays->blkio_count;
|
||||||
|
d->swapin_count += tsk->delays->swapin_count;
|
||||||
|
spin_unlock(&tsk->delays->lock);
|
||||||
|
|
||||||
|
done:
|
||||||
|
spin_unlock(&tsk->delays_lock);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
|
@ -18,13 +18,13 @@
|
||||||
|
|
||||||
#include <linux/kernel.h>
|
#include <linux/kernel.h>
|
||||||
#include <linux/taskstats_kern.h>
|
#include <linux/taskstats_kern.h>
|
||||||
|
#include <linux/delayacct.h>
|
||||||
#include <net/genetlink.h>
|
#include <net/genetlink.h>
|
||||||
#include <asm/atomic.h>
|
#include <asm/atomic.h>
|
||||||
|
|
||||||
static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
|
static DEFINE_PER_CPU(__u32, taskstats_seqnum) = { 0 };
|
||||||
static int family_registered;
|
static int family_registered;
|
||||||
kmem_cache_t *taskstats_cache;
|
kmem_cache_t *taskstats_cache;
|
||||||
static DEFINE_MUTEX(taskstats_exit_mutex);
|
|
||||||
|
|
||||||
static struct genl_family family = {
|
static struct genl_family family = {
|
||||||
.id = GENL_ID_GENERATE,
|
.id = GENL_ID_GENERATE,
|
||||||
|
@ -120,7 +120,10 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk,
|
||||||
* goto err;
|
* goto err;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
err:
|
rc = delayacct_add_tsk(stats, tsk);
|
||||||
|
stats->version = TASKSTATS_VERSION;
|
||||||
|
|
||||||
|
/* Define err: label here if needed */
|
||||||
put_task_struct(tsk);
|
put_task_struct(tsk);
|
||||||
return rc;
|
return rc;
|
||||||
|
|
||||||
|
@ -152,8 +155,14 @@ static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk,
|
||||||
* break;
|
* break;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
rc = delayacct_add_tsk(stats, tsk);
|
||||||
|
if (rc)
|
||||||
|
break;
|
||||||
|
|
||||||
} while_each_thread(first, tsk);
|
} while_each_thread(first, tsk);
|
||||||
read_unlock(&tasklist_lock);
|
read_unlock(&tasklist_lock);
|
||||||
|
stats->version = TASKSTATS_VERSION;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Accounting subsytems can also add calls here if they don't
|
* Accounting subsytems can also add calls here if they don't
|
||||||
|
@ -233,8 +242,6 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
|
||||||
if (!family_registered || !tidstats)
|
if (!family_registered || !tidstats)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
mutex_lock(&taskstats_exit_mutex);
|
|
||||||
|
|
||||||
is_thread_group = !thread_group_empty(tsk);
|
is_thread_group = !thread_group_empty(tsk);
|
||||||
rc = 0;
|
rc = 0;
|
||||||
|
|
||||||
|
@ -292,7 +299,6 @@ nla_put_failure:
|
||||||
err_skb:
|
err_skb:
|
||||||
nlmsg_free(rep_skb);
|
nlmsg_free(rep_skb);
|
||||||
ret:
|
ret:
|
||||||
mutex_unlock(&taskstats_exit_mutex);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue