mirror of
https://github.com/Fishwaldo/Star64_linux.git
synced 2025-04-23 06:44:03 +00:00
There is a small race between copy_process() and sched_fork()
where child->sched_task_group point to an already freed pointer.
parent doing fork() | someone moving the parent
| to another cgroup
-------------------------------+-------------------------------
copy_process()
+ dup_task_struct()<1>
parent move to another cgroup,
and free the old cgroup. <2>
+ sched_fork()
+ __set_task_cpu()<3>
+ task_fork_fair()
+ sched_slice()<4>
In the worst case, this bug can lead to "use-after-free" and
cause panic as shown above:
(1) parent copy its sched_task_group to child at <1>;
(2) someone move the parent to another cgroup and free the old
cgroup at <2>;
(3) the sched_task_group and cfs_rq that belong to the old cgroup
will be accessed at <3> and <4>, which cause a panic:
[] BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
[] PGD 8000001fa0a86067 P4D 8000001fa0a86067 PUD 2029955067 PMD 0
[] Oops: 0000 [#1] SMP PTI
[] CPU: 7 PID: 648398 Comm: ebizzy Kdump: loaded Tainted: G OE --------- - - 4.18.0.x86_64+ #1
[] RIP: 0010:sched_slice+0x84/0xc0
[] Call Trace:
[] task_fork_fair+0x81/0x120
[] sched_fork+0x132/0x240
[] copy_process.part.5+0x675/0x20e0
[] ? __handle_mm_fault+0x63f/0x690
[] _do_fork+0xcd/0x3b0
[] do_syscall_64+0x5d/0x1d0
[] entry_SYSCALL_64_after_hwframe+0x65/0xca
[] RIP: 0033:0x7f04418cd7e1
Between cgroup_can_fork() and cgroup_post_fork(), the cgroup
membership and thus sched_task_group can't change. So update child's
sched_task_group at sched_post_fork() and move task_fork() and
__set_task_cpu() (where accees the sched_task_group) from sched_fork()
to sched_post_fork().
Fixes: 8323f26ce3
("sched: Fix race in task_group")
Signed-off-by: Zhang Qiao <zhangqiao22@huawei.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lkml.kernel.org/r/20210915064030.2231-1-zhangqiao22@huawei.com
177 lines
4.5 KiB
C
177 lines
4.5 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
#ifndef _LINUX_SCHED_TASK_H
|
|
#define _LINUX_SCHED_TASK_H
|
|
|
|
/*
|
|
* Interface between the scheduler and various task lifetime (fork()/exit())
|
|
* functionality:
|
|
*/
|
|
|
|
#include <linux/sched.h>
|
|
#include <linux/uaccess.h>
|
|
|
|
struct task_struct;
|
|
struct rusage;
|
|
union thread_union;
|
|
struct css_set;
|
|
|
|
/* All the bits taken by the old clone syscall. */
|
|
#define CLONE_LEGACY_FLAGS 0xffffffffULL
|
|
|
|
struct kernel_clone_args {
|
|
u64 flags;
|
|
int __user *pidfd;
|
|
int __user *child_tid;
|
|
int __user *parent_tid;
|
|
int exit_signal;
|
|
unsigned long stack;
|
|
unsigned long stack_size;
|
|
unsigned long tls;
|
|
pid_t *set_tid;
|
|
/* Number of elements in *set_tid */
|
|
size_t set_tid_size;
|
|
int cgroup;
|
|
int io_thread;
|
|
struct cgroup *cgrp;
|
|
struct css_set *cset;
|
|
};
|
|
|
|
/*
|
|
* This serializes "schedule()" and also protects
|
|
* the run-queue from deletions/modifications (but
|
|
* _adding_ to the beginning of the run-queue has
|
|
* a separate lock).
|
|
*/
|
|
extern rwlock_t tasklist_lock;
|
|
extern spinlock_t mmlist_lock;
|
|
|
|
extern union thread_union init_thread_union;
|
|
extern struct task_struct init_task;
|
|
|
|
extern int lockdep_tasklist_lock_is_held(void);
|
|
|
|
extern asmlinkage void schedule_tail(struct task_struct *prev);
|
|
extern void init_idle(struct task_struct *idle, int cpu);
|
|
|
|
extern int sched_fork(unsigned long clone_flags, struct task_struct *p);
|
|
extern void sched_post_fork(struct task_struct *p,
|
|
struct kernel_clone_args *kargs);
|
|
extern void sched_dead(struct task_struct *p);
|
|
|
|
void __noreturn do_task_dead(void);
|
|
|
|
extern void proc_caches_init(void);
|
|
|
|
extern void fork_init(void);
|
|
|
|
extern void release_task(struct task_struct * p);
|
|
|
|
extern int copy_thread(unsigned long, unsigned long, unsigned long,
|
|
struct task_struct *, unsigned long);
|
|
|
|
extern void flush_thread(void);
|
|
|
|
#ifdef CONFIG_HAVE_EXIT_THREAD
|
|
extern void exit_thread(struct task_struct *tsk);
|
|
#else
|
|
static inline void exit_thread(struct task_struct *tsk)
|
|
{
|
|
}
|
|
#endif
|
|
extern void do_group_exit(int);
|
|
|
|
extern void exit_files(struct task_struct *);
|
|
extern void exit_itimers(struct signal_struct *);
|
|
|
|
extern pid_t kernel_clone(struct kernel_clone_args *kargs);
|
|
struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
|
|
struct task_struct *fork_idle(int);
|
|
struct mm_struct *copy_init_mm(void);
|
|
extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
|
|
extern long kernel_wait4(pid_t, int __user *, int, struct rusage *);
|
|
int kernel_wait(pid_t pid, int *stat);
|
|
|
|
extern void free_task(struct task_struct *tsk);
|
|
|
|
/* sched_exec is called by processes performing an exec */
|
|
#ifdef CONFIG_SMP
|
|
extern void sched_exec(void);
|
|
#else
|
|
#define sched_exec() {}
|
|
#endif
|
|
|
|
static inline struct task_struct *get_task_struct(struct task_struct *t)
|
|
{
|
|
refcount_inc(&t->usage);
|
|
return t;
|
|
}
|
|
|
|
extern void __put_task_struct(struct task_struct *t);
|
|
|
|
static inline void put_task_struct(struct task_struct *t)
|
|
{
|
|
if (refcount_dec_and_test(&t->usage))
|
|
__put_task_struct(t);
|
|
}
|
|
|
|
static inline void put_task_struct_many(struct task_struct *t, int nr)
|
|
{
|
|
if (refcount_sub_and_test(nr, &t->usage))
|
|
__put_task_struct(t);
|
|
}
|
|
|
|
void put_task_struct_rcu_user(struct task_struct *task);
|
|
|
|
#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
|
|
extern int arch_task_struct_size __read_mostly;
|
|
#else
|
|
# define arch_task_struct_size (sizeof(struct task_struct))
|
|
#endif
|
|
|
|
#ifndef CONFIG_HAVE_ARCH_THREAD_STRUCT_WHITELIST
|
|
/*
|
|
* If an architecture has not declared a thread_struct whitelist we
|
|
* must assume something there may need to be copied to userspace.
|
|
*/
|
|
static inline void arch_thread_struct_whitelist(unsigned long *offset,
|
|
unsigned long *size)
|
|
{
|
|
*offset = 0;
|
|
/* Handle dynamically sized thread_struct. */
|
|
*size = arch_task_struct_size - offsetof(struct task_struct, thread);
|
|
}
|
|
#endif
|
|
|
|
#ifdef CONFIG_VMAP_STACK
|
|
static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
|
|
{
|
|
return t->stack_vm_area;
|
|
}
|
|
#else
|
|
static inline struct vm_struct *task_stack_vm_area(const struct task_struct *t)
|
|
{
|
|
return NULL;
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Protects ->fs, ->files, ->mm, ->group_info, ->comm, keyring
|
|
* subscriptions and synchronises with wait4(). Also used in procfs. Also
|
|
* pins the final release of task.io_context. Also protects ->cpuset and
|
|
* ->cgroup.subsys[]. And ->vfork_done.
|
|
*
|
|
* Nests both inside and outside of read_lock(&tasklist_lock).
|
|
* It must not be nested with write_lock_irq(&tasklist_lock),
|
|
* neither inside nor outside.
|
|
*/
|
|
static inline void task_lock(struct task_struct *p)
|
|
{
|
|
spin_lock(&p->alloc_lock);
|
|
}
|
|
|
|
static inline void task_unlock(struct task_struct *p)
|
|
{
|
|
spin_unlock(&p->alloc_lock);
|
|
}
|
|
|
|
#endif /* _LINUX_SCHED_TASK_H */
|