Merge branch 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup

Pull cgroup updates from Tejun Heo:
 "Cgroup2 cpu controller support is finally merged.

   - Basic cpu statistics support to allow monitoring by default without
     the CPU controller enabled.

   - cgroup2 cpu controller support.

   - /sys/kernel/cgroup files to help dealing with new / optional
     features"

* 'for-4.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
  cgroup: export list of cgroups v2 features using sysfs
  cgroup: export list of delegatable control files using sysfs
  cgroup: mark @cgrp __maybe_unused in cpu_stat_show()
  MAINTAINERS: relocate cpuset.c
  cgroup, sched: Move basic cpu stats from cgroup.stat to cpu.stat
  sched: Implement interface for cgroup unified hierarchy
  sched: Misc preps for cgroup unified hierarchy interface
  sched/cputime: Add dummy cputime_adjust() implementation for CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
  cgroup: statically initialize init_css_set->dfl_cgrp
  cgroup: Implement cgroup2 basic CPU usage accounting
  cpuacct: Introduce cgroup_account_cputime[_field]()
  sched/cputime: Expose cputime_adjust()
This commit is contained in:
Linus Torvalds 2017-11-15 14:29:44 -08:00
commit 22714a2ba4
17 changed files with 821 additions and 61 deletions

View file

@ -17,6 +17,7 @@
#include <linux/refcount.h>
#include <linux/percpu-refcount.h>
#include <linux/percpu-rwsem.h>
#include <linux/u64_stats_sync.h>
#include <linux/workqueue.h>
#include <linux/bpf-cgroup.h>
@ -255,6 +256,57 @@ struct css_set {
struct rcu_head rcu_head;
};
/*
* cgroup basic resource usage statistics. Accounting is done per-cpu in
* cgroup_cpu_stat which is then lazily propagated up the hierarchy on
* reads.
*
* When a stat gets updated, the cgroup_cpu_stat and its ancestors are
* linked into the updated tree. On the following read, propagation only
* considers and consumes the updated tree. This makes reading O(the
* number of descendants which have been active since last read) instead of
* O(the total number of descendants).
*
* This is important because there can be a lot of (draining) cgroups which
* aren't active and stat may be read frequently. The combination can
* become very expensive. By propagating selectively, increasing reading
* frequency decreases the cost of each read.
*/
struct cgroup_cpu_stat {
/*
* ->sync protects all the current counters. These are the only
* fields which get updated in the hot path.
*/
struct u64_stats_sync sync;
struct task_cputime cputime;
/*
* Snapshots at the last reading. These are used to calculate the
* deltas to propagate to the global counters.
*/
struct task_cputime last_cputime;
/*
* Child cgroups with stat updates on this cpu since the last read
* are linked on the parent's ->updated_children through
* ->updated_next.
*
* In addition to being more compact, singly-linked list pointing
* to the cgroup makes it unnecessary for each per-cpu struct to
* point back to the associated cgroup.
*
* Protected by per-cpu cgroup_cpu_stat_lock.
*/
struct cgroup *updated_children; /* terminated by self cgroup */
struct cgroup *updated_next; /* NULL iff not on the list */
};
struct cgroup_stat {
/* per-cpu statistics are collected into the folowing global counters */
struct task_cputime cputime;
struct prev_cputime prev_cputime;
};
struct cgroup {
/* self css with NULL ->ss, points back to this cgroup */
struct cgroup_subsys_state self;
@ -354,6 +406,11 @@ struct cgroup {
*/
struct cgroup *dom_cgrp;
/* cgroup basic resource statistics */
struct cgroup_cpu_stat __percpu *cpu_stat;
struct cgroup_stat pending_stat; /* pending from children */
struct cgroup_stat stat;
/*
* list of pidlists, up to two for each namespace (one for procs, one
* for tasks); created on demand.
@ -513,6 +570,8 @@ struct cgroup_subsys {
void (*css_released)(struct cgroup_subsys_state *css);
void (*css_free)(struct cgroup_subsys_state *css);
void (*css_reset)(struct cgroup_subsys_state *css);
int (*css_extra_stat_show)(struct seq_file *seq,
struct cgroup_subsys_state *css);
int (*can_attach)(struct cgroup_taskset *tset);
void (*cancel_attach)(struct cgroup_taskset *tset);