This tree introduces static_call(), which is the idea of static_branch()

applied to indirect function calls. Remove a data load (indirection) by
 modifying the text.
 
 They give the flexibility of function pointers, but with better
 performance. (This is especially important for cases where
 retpolines would otherwise be used, as retpolines can be pretty
 slow.)
 
 API overview:
 
   DECLARE_STATIC_CALL(name, func);
   DEFINE_STATIC_CALL(name, func);
   DEFINE_STATIC_CALL_NULL(name, typename);
 
   static_call(name)(args...);
   static_call_cond(name)(args...);
   static_call_update(name, func);
 
 x86 is supported via text patching, otherwise basic indirect calls are used,
 with function pointers.
 
 There's a second variant using inline code patching, inspired by jump-labels,
 implemented on x86 as well.
 
 The new APIs are utilized in the x86 perf code, a heavy user of function pointers,
 where static calls speed up the PMU handler by 4.2% (!).
 
 The generic implementation is not really excercised on other architectures,
 outside of the trivial test_static_call_init() self-test.
 
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCgAvFiEEBpT5eoXrXCwVQwEKEnMQ0APhK1gFAl+EfAQRHG1pbmdvQGtl
 cm5lbC5vcmcACgkQEnMQ0APhK1iEAw//divHeVCJnHhV+YBbuI9ROUsERkzu8VhK
 O1DEmW68Fvj7pszT8NZsMjtkt97ZtxDRK7aCJiiup0eItG9qCJ8lpCLb84ZbizHV
 HhCbhBLrpxSvTrWlQnkgP1OkPAbtoryIjVlZzWhjye2MY8UEbVnZWyviBolbAAxH
 Fk1Yi56fIMu19GO+9Ohzy9E2VDnVEH1iMx5YWoLD2H88Qbq/yEMP+U2tIj8hIVKT
 Y/jdogihNXRIau6QB+YPfDPisdty+RHxfU7zct4Rv8cFF5ylglZB5fD34C3sUQF2
 WqsaYz7zjUj9f02F8pw8hIaAT7InzArPhlNVITxf2oMfmdrNqBptnSCddZqCJLvv
 oDGew21k50Zcbqkv9amclpxXH5tTpRvJeqit2pz/85GMeqBRuhzHUAkCpht5YA73
 qJsHWS3z+qIxKi0tDbhDJswuwa51q5sgdUUwo1uCr3wT3DGDlqNhCAZBzX14dcty
 0shDSbv13TCwqAcb7asPzEoPwE15cwa+x+viGEIL901pyZKyQYjs/abDU26It3BW
 roWRkuVJZ9/QMdZJs1v7kaXw1L8YiKIDkBgke+xbfrDwEvvjudQkl2LUL66DB11j
 RJU3GyxKClvdY06SSRh/H13fqZLNKh1JZ0nPEWSTJECDFN9zcDjrDrod/7PFOcpY
 NAlawLoGG+s=
 =JvpF
 -----END PGP SIGNATURE-----

Merge tag 'core-static_call-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull static call support from Ingo Molnar:
 "This introduces static_call(), which is the idea of static_branch()
  applied to indirect function calls. Remove a data load (indirection)
  by modifying the text.

  They give the flexibility of function pointers, but with better
  performance. (This is especially important for cases where retpolines
  would otherwise be used, as retpolines can be pretty slow.)

  API overview:

      DECLARE_STATIC_CALL(name, func);
      DEFINE_STATIC_CALL(name, func);
      DEFINE_STATIC_CALL_NULL(name, typename);

      static_call(name)(args...);
      static_call_cond(name)(args...);
      static_call_update(name, func);

  x86 is supported via text patching, otherwise basic indirect calls are
  used, with function pointers.

  There's a second variant using inline code patching, inspired by
  jump-labels, implemented on x86 as well.

  The new APIs are utilized in the x86 perf code, a heavy user of
  function pointers, where static calls speed up the PMU handler by
  4.2% (!).

  The generic implementation is not really excercised on other
  architectures, outside of the trivial test_static_call_init()
  self-test"

* tag 'core-static_call-2020-10-12' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (21 commits)
  static_call: Fix return type of static_call_init
  tracepoint: Fix out of sync data passing by static caller
  tracepoint: Fix overly long tracepoint names
  x86/perf, static_call: Optimize x86_pmu methods
  tracepoint: Optimize using static_call()
  static_call: Allow early init
  static_call: Add some validation
  static_call: Handle tail-calls
  static_call: Add static_call_cond()
  x86/alternatives: Teach text_poke_bp() to emulate RET
  static_call: Add simple self-test for static calls
  x86/static_call: Add inline static call implementation for x86-64
  x86/static_call: Add out-of-line static call implementation
  static_call: Avoid kprobes on inline static_call()s
  static_call: Add inline static call infrastructure
  static_call: Add basic static call infrastructure
  compiler.h: Make __ADDRESSABLE() symbol truly unique
  jump_label,module: Fix module lifetime for __jump_label_mod_text_reserved()
  module: Properly propagate MODULE_STATE_COMING failure
  module: Fix up module_notifier return values
  ...
This commit is contained in:
Linus Torvalds 2020-10-12 13:58:15 -07:00
commit dd502a8107
47 changed files with 1585 additions and 241 deletions

View file

@ -16,6 +16,7 @@
#include <linux/hashtable.h>
#include <linux/kernel.h>
#include <linux/static_call_types.h>
#define FAKE_JUMP_OFFSET -1
@ -433,6 +434,103 @@ reachable:
return 0;
}
static int create_static_call_sections(struct objtool_file *file)
{
struct section *sec, *reloc_sec;
struct reloc *reloc;
struct static_call_site *site;
struct instruction *insn;
struct symbol *key_sym;
char *key_name, *tmp;
int idx;
sec = find_section_by_name(file->elf, ".static_call_sites");
if (sec) {
INIT_LIST_HEAD(&file->static_call_list);
WARN("file already has .static_call_sites section, skipping");
return 0;
}
if (list_empty(&file->static_call_list))
return 0;
idx = 0;
list_for_each_entry(insn, &file->static_call_list, static_call_node)
idx++;
sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE,
sizeof(struct static_call_site), idx);
if (!sec)
return -1;
reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
if (!reloc_sec)
return -1;
idx = 0;
list_for_each_entry(insn, &file->static_call_list, static_call_node) {
site = (struct static_call_site *)sec->data->d_buf + idx;
memset(site, 0, sizeof(struct static_call_site));
/* populate reloc for 'addr' */
reloc = malloc(sizeof(*reloc));
if (!reloc) {
perror("malloc");
return -1;
}
memset(reloc, 0, sizeof(*reloc));
reloc->sym = insn->sec->sym;
reloc->addend = insn->offset;
reloc->type = R_X86_64_PC32;
reloc->offset = idx * sizeof(struct static_call_site);
reloc->sec = reloc_sec;
elf_add_reloc(file->elf, reloc);
/* find key symbol */
key_name = strdup(insn->call_dest->name);
if (!key_name) {
perror("strdup");
return -1;
}
if (strncmp(key_name, STATIC_CALL_TRAMP_PREFIX_STR,
STATIC_CALL_TRAMP_PREFIX_LEN)) {
WARN("static_call: trampoline name malformed: %s", key_name);
return -1;
}
tmp = key_name + STATIC_CALL_TRAMP_PREFIX_LEN - STATIC_CALL_KEY_PREFIX_LEN;
memcpy(tmp, STATIC_CALL_KEY_PREFIX_STR, STATIC_CALL_KEY_PREFIX_LEN);
key_sym = find_symbol_by_name(file->elf, tmp);
if (!key_sym) {
WARN("static_call: can't find static_call_key symbol: %s", tmp);
return -1;
}
free(key_name);
/* populate reloc for 'key' */
reloc = malloc(sizeof(*reloc));
if (!reloc) {
perror("malloc");
return -1;
}
memset(reloc, 0, sizeof(*reloc));
reloc->sym = key_sym;
reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0;
reloc->type = R_X86_64_PC32;
reloc->offset = idx * sizeof(struct static_call_site) + 4;
reloc->sec = reloc_sec;
elf_add_reloc(file->elf, reloc);
idx++;
}
if (elf_rebuild_reloc_section(file->elf, reloc_sec))
return -1;
return 0;
}
/*
* Warnings shouldn't be reported for ignored functions.
*/
@ -705,6 +803,10 @@ static int add_jump_destinations(struct objtool_file *file)
} else {
/* external sibling call */
insn->call_dest = reloc->sym;
if (insn->call_dest->static_call_tramp) {
list_add_tail(&insn->static_call_node,
&file->static_call_list);
}
continue;
}
@ -756,6 +858,10 @@ static int add_jump_destinations(struct objtool_file *file)
/* internal sibling call */
insn->call_dest = insn->jump_dest->func;
if (insn->call_dest->static_call_tramp) {
list_add_tail(&insn->static_call_node,
&file->static_call_list);
}
}
}
}
@ -1578,6 +1684,23 @@ static int read_intra_function_calls(struct objtool_file *file)
return 0;
}
static int read_static_call_tramps(struct objtool_file *file)
{
struct section *sec;
struct symbol *func;
for_each_sec(file, sec) {
list_for_each_entry(func, &sec->symbol_list, list) {
if (func->bind == STB_GLOBAL &&
!strncmp(func->name, STATIC_CALL_TRAMP_PREFIX_STR,
strlen(STATIC_CALL_TRAMP_PREFIX_STR)))
func->static_call_tramp = true;
}
}
return 0;
}
static void mark_rodata(struct objtool_file *file)
{
struct section *sec;
@ -1625,6 +1748,10 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
ret = read_static_call_tramps(file);
if (ret)
return ret;
ret = add_jump_destinations(file);
if (ret)
return ret;
@ -2488,6 +2615,11 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
if (dead_end_function(file, insn->call_dest))
return 0;
if (insn->type == INSN_CALL && insn->call_dest->static_call_tramp) {
list_add_tail(&insn->static_call_node,
&file->static_call_list);
}
break;
case INSN_JUMP_CONDITIONAL:
@ -2847,6 +2979,7 @@ int check(const char *_objname, bool orc)
INIT_LIST_HEAD(&file.insn_list);
hash_init(file.insn_hash);
INIT_LIST_HEAD(&file.static_call_list);
file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
file.ignore_unreachables = no_unreachable;
file.hints = false;
@ -2894,6 +3027,11 @@ int check(const char *_objname, bool orc)
warnings += ret;
}
ret = create_static_call_sections(&file);
if (ret < 0)
goto out;
warnings += ret;
if (orc) {
ret = create_orc(&file);
if (ret < 0)

View file

@ -22,6 +22,7 @@ struct insn_state {
struct instruction {
struct list_head list;
struct hlist_node hash;
struct list_head static_call_node;
struct section *sec;
unsigned long offset;
unsigned int len;

View file

@ -652,7 +652,7 @@ err:
}
struct section *elf_create_section(struct elf *elf, const char *name,
size_t entsize, int nr)
unsigned int sh_flags, size_t entsize, int nr)
{
struct section *sec, *shstrtab;
size_t size = entsize * nr;
@ -712,7 +712,7 @@ struct section *elf_create_section(struct elf *elf, const char *name,
sec->sh.sh_entsize = entsize;
sec->sh.sh_type = SHT_PROGBITS;
sec->sh.sh_addralign = 1;
sec->sh.sh_flags = SHF_ALLOC;
sec->sh.sh_flags = SHF_ALLOC | sh_flags;
/* Add section name to .shstrtab (or .strtab for Clang) */
@ -767,7 +767,7 @@ static struct section *elf_create_rel_reloc_section(struct elf *elf, struct sect
strcpy(relocname, ".rel");
strcat(relocname, base->name);
sec = elf_create_section(elf, relocname, sizeof(GElf_Rel), 0);
sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rel), 0);
free(relocname);
if (!sec)
return NULL;
@ -797,7 +797,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec
strcpy(relocname, ".rela");
strcat(relocname, base->name);
sec = elf_create_section(elf, relocname, sizeof(GElf_Rela), 0);
sec = elf_create_section(elf, relocname, 0, sizeof(GElf_Rela), 0);
free(relocname);
if (!sec)
return NULL;

View file

@ -56,6 +56,7 @@ struct symbol {
unsigned int len;
struct symbol *pfunc, *cfunc, *alias;
bool uaccess_safe;
bool static_call_tramp;
};
struct reloc {
@ -120,7 +121,7 @@ static inline u32 reloc_hash(struct reloc *reloc)
}
struct elf *elf_open_read(const char *name, int flags);
struct section *elf_create_section(struct elf *elf, const char *name, size_t entsize, int nr);
struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype);
void elf_add_reloc(struct elf *elf, struct reloc *reloc);
int elf_write_insn(struct elf *elf, struct section *sec,

View file

@ -16,6 +16,7 @@ struct objtool_file {
struct elf *elf;
struct list_head insn_list;
DECLARE_HASHTABLE(insn_hash, 20);
struct list_head static_call_list;
bool ignore_unreachables, c_file, hints, rodata;
};

View file

@ -177,7 +177,7 @@ int create_orc_sections(struct objtool_file *file)
/* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
sec = elf_create_section(file->elf, ".orc_unwind_ip", sizeof(int), idx);
sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx);
if (!sec)
return -1;
@ -186,7 +186,7 @@ int create_orc_sections(struct objtool_file *file)
return -1;
/* create .orc_unwind section */
u_sec = elf_create_section(file->elf, ".orc_unwind",
u_sec = elf_create_section(file->elf, ".orc_unwind", 0,
sizeof(struct orc_entry), idx);
/* populate sections */

View file

@ -7,6 +7,7 @@ arch/x86/include/asm/orc_types.h
arch/x86/include/asm/emulate_prefix.h
arch/x86/lib/x86-opcode-map.txt
arch/x86/tools/gen-insn-attr-x86.awk
include/linux/static_call_types.h
'
check_2 () {