Skip to content

Commit 10e9510

Browse files
samitolvanenmasahir0y
authored andcommitted
gendwarfksyms: Add a separate pass to resolve FQNs
Using dwarf_getscopes_die to resolve fully-qualified names turns out to be rather slow, and also results in duplicate scopes being processed, which doesn't help. Simply adding an extra pass to resolve names for all DIEs before processing exports is noticeably faster. For the object files with the most exports in a defconfig+Rust build, the performance improvement is consistently >50%: rust/bindings.o: 1038 exports before: 9.5980 +- 0.0183 seconds time elapsed ( +- 0.19% ) after: 4.3116 +- 0.0287 seconds time elapsed ( +- 0.67% ) rust/core.o: 424 exports before: 5.3584 +- 0.0204 seconds time elapsed ( +- 0.38% ) after: 0.05348 +- 0.00129 seconds time elapsed ( +- 2.42% ) ^ Not a mistake. net/core/dev.o: 190 exports before: 9.0507 +- 0.0297 seconds time elapsed ( +- 0.33% ) after: 3.2882 +- 0.0165 seconds time elapsed ( +- 0.50% ) rust/kernel.o: 129 exports before: 6.8571 +- 0.0317 seconds time elapsed ( +- 0.46% ) after: 2.9096 +- 0.0316 seconds time elapsed ( +- 1.09% ) net/core/skbuff.o: 120 exports before: 5.4805 +- 0.0291 seconds time elapsed ( +- 0.53% ) after: 2.0339 +- 0.0231 seconds time elapsed ( +- 1.14% ) drivers/gpu/drm/display/drm_dp_helper.o: 101 exports before: 1.7877 +- 0.0187 seconds time elapsed ( +- 1.05% ) after: 0.69245 +- 0.00994 seconds time elapsed ( +- 1.44% ) net/core/sock.o: 97 exports before: 5.8327 +- 0.0653 seconds time elapsed ( +- 1.12% ) after: 2.0784 +- 0.0291 seconds time elapsed ( +- 1.40% ) drivers/net/phy/phy_device.o: 95 exports before: 3.0671 +- 0.0371 seconds time elapsed ( +- 1.21% ) after: 1.2127 +- 0.0207 seconds time elapsed ( +- 1.70% ) drivers/pci/pci.o: 93 exports before: 1.1130 +- 0.0113 seconds time elapsed ( +- 1.01% ) after: 0.4848 +- 0.0127 seconds time elapsed ( +- 2.63% ) kernel/sched/core.o: 83 exports before: 3.5092 +- 0.0223 seconds time elapsed ( +- 0.64% ) after: 1.1231 +- 0.0145 seconds time elapsed ( +- 1.29% ) Overall, a defconfig+DWARF5 build with gendwarfksyms and Rust is 14.8% faster with this patch applied on my test system. Without Rust, there's still a 10.4% improvement in build time when gendwarfksyms is used. Note that symbol versions are unchanged with this patch. Suggested-by: Giuliano Procida <gprocida@google.com> Signed-off-by: Sami Tolvanen <samitolvanen@google.com> Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
1 parent 80e54e8 commit 10e9510

4 files changed

Lines changed: 86 additions & 72 deletions

File tree

scripts/gendwarfksyms/die.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
#include <string.h>
77
#include "gendwarfksyms.h"
88

9-
#define DIE_HASH_BITS 15
9+
#define DIE_HASH_BITS 16
1010

1111
/* {die->addr, state} -> struct die * */
1212
static HASHTABLE_DEFINE(die_map, 1 << DIE_HASH_BITS);

scripts/gendwarfksyms/dwarf.c

Lines changed: 82 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
* Copyright (C) 2024 Google LLC
44
*/
55

6+
#define _GNU_SOURCE
67
#include <assert.h>
78
#include <inttypes.h>
89
#include <stdarg.h>
@@ -193,79 +194,17 @@ static void process_fmt(struct die *cache, const char *fmt, ...)
193194
va_end(args);
194195
}
195196

196-
#define MAX_FQN_SIZE 64
197-
198-
/* Get a fully qualified name from DWARF scopes */
199-
static char *get_fqn(Dwarf_Die *die)
197+
static void update_fqn(struct die *cache, Dwarf_Die *die)
200198
{
201-
const char *list[MAX_FQN_SIZE];
202-
Dwarf_Die *scopes = NULL;
203-
bool has_name = false;
204-
char *fqn = NULL;
205-
char *p;
206-
int count = 0;
207-
int len = 0;
208-
int res;
209-
int i;
210-
211-
res = checkp(dwarf_getscopes_die(die, &scopes));
212-
if (!res) {
213-
list[count] = get_name_attr(die);
214-
215-
if (!list[count])
216-
return NULL;
217-
218-
len += strlen(list[count]);
219-
count++;
220-
221-
goto done;
222-
}
223-
224-
for (i = res - 1; i >= 0 && count < MAX_FQN_SIZE; i--) {
225-
if (dwarf_tag(&scopes[i]) == DW_TAG_compile_unit)
226-
continue;
227-
228-
list[count] = get_name_attr(&scopes[i]);
229-
230-
if (list[count]) {
231-
has_name = true;
232-
} else {
233-
list[count] = "<anonymous>";
234-
has_name = false;
235-
}
199+
struct die *fqn;
236200

237-
len += strlen(list[count]);
238-
count++;
239-
240-
if (i > 0) {
241-
list[count++] = "::";
242-
len += 2;
243-
}
201+
if (!cache->fqn) {
202+
if (!__die_map_get((uintptr_t)die->addr, DIE_FQN, &fqn) &&
203+
*fqn->fqn)
204+
cache->fqn = xstrdup(fqn->fqn);
205+
else
206+
cache->fqn = "";
244207
}
245-
246-
free(scopes);
247-
248-
if (count == MAX_FQN_SIZE)
249-
warn("increase MAX_FQN_SIZE: reached the maximum");
250-
251-
/* Consider the DIE unnamed if the last scope doesn't have a name */
252-
if (!has_name)
253-
return NULL;
254-
done:
255-
fqn = xmalloc(len + 1);
256-
*fqn = '\0';
257-
258-
p = fqn;
259-
for (i = 0; i < count; i++)
260-
p = stpcpy(p, list[i]);
261-
262-
return fqn;
263-
}
264-
265-
static void update_fqn(struct die *cache, Dwarf_Die *die)
266-
{
267-
if (!cache->fqn)
268-
cache->fqn = get_fqn(die) ?: "";
269208
}
270209

271210
static void process_fqn(struct die *cache, Dwarf_Die *die)
@@ -1148,8 +1087,81 @@ static void process_symbol_ptr(struct symbol *sym, void *arg)
11481087
cache_free(&state.expansion_cache);
11491088
}
11501089

1090+
static int resolve_fqns(struct state *parent, struct die *unused,
1091+
Dwarf_Die *die)
1092+
{
1093+
struct state state;
1094+
struct die *cache;
1095+
const char *name;
1096+
bool use_prefix;
1097+
char *prefix = NULL;
1098+
char *fqn = "";
1099+
int tag;
1100+
1101+
if (!__die_map_get((uintptr_t)die->addr, DIE_FQN, &cache))
1102+
return 0;
1103+
1104+
tag = dwarf_tag(die);
1105+
1106+
/*
1107+
* Only namespaces and structures need to pass a prefix to the next
1108+
* scope.
1109+
*/
1110+
use_prefix = tag == DW_TAG_namespace || tag == DW_TAG_class_type ||
1111+
tag == DW_TAG_structure_type;
1112+
1113+
state.expand.current_fqn = NULL;
1114+
name = get_name_attr(die);
1115+
1116+
if (parent && parent->expand.current_fqn && (use_prefix || name)) {
1117+
/*
1118+
* The fqn for the current DIE, and if needed, a prefix for the
1119+
* next scope.
1120+
*/
1121+
if (asprintf(&prefix, "%s::%s", parent->expand.current_fqn,
1122+
name ? name : "<anonymous>") < 0)
1123+
error("asprintf failed");
1124+
1125+
if (use_prefix)
1126+
state.expand.current_fqn = prefix;
1127+
1128+
/*
1129+
* Use fqn only if the DIE has a name. Otherwise fqn will
1130+
* remain empty.
1131+
*/
1132+
if (name) {
1133+
fqn = prefix;
1134+
/* prefix will be freed by die_map. */
1135+
prefix = NULL;
1136+
}
1137+
} else if (name) {
1138+
/* No prefix from the previous scope. Use only the name. */
1139+
fqn = xstrdup(name);
1140+
1141+
if (use_prefix)
1142+
state.expand.current_fqn = fqn;
1143+
}
1144+
1145+
/* If the DIE has a non-empty name, cache it. */
1146+
if (*fqn) {
1147+
cache = die_map_get(die, DIE_FQN);
1148+
/* Move ownership of fqn to die_map. */
1149+
cache->fqn = fqn;
1150+
cache->state = DIE_FQN;
1151+
}
1152+
1153+
check(process_die_container(&state, NULL, die, resolve_fqns,
1154+
match_all));
1155+
1156+
free(prefix);
1157+
return 0;
1158+
}
1159+
11511160
void process_cu(Dwarf_Die *cudie)
11521161
{
1162+
check(process_die_container(NULL, NULL, cudie, resolve_fqns,
1163+
match_all));
1164+
11531165
check(process_die_container(NULL, NULL, cudie, process_exported_symbols,
11541166
match_all));
11551167

scripts/gendwarfksyms/gendwarfksyms.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,7 @@ void symbol_free(void);
139139

140140
enum die_state {
141141
DIE_INCOMPLETE,
142+
DIE_FQN,
142143
DIE_UNEXPANDED,
143144
DIE_COMPLETE,
144145
DIE_SYMBOL,
@@ -170,6 +171,7 @@ static inline const char *die_state_name(enum die_state state)
170171
{
171172
switch (state) {
172173
CASE_CONST_TO_STR(DIE_INCOMPLETE)
174+
CASE_CONST_TO_STR(DIE_FQN)
173175
CASE_CONST_TO_STR(DIE_UNEXPANDED)
174176
CASE_CONST_TO_STR(DIE_COMPLETE)
175177
CASE_CONST_TO_STR(DIE_SYMBOL)

scripts/gendwarfksyms/types.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -248,7 +248,7 @@ static char *get_type_name(struct die *cache)
248248
warn("found incomplete cache entry: %p", cache);
249249
return NULL;
250250
}
251-
if (cache->state == DIE_SYMBOL)
251+
if (cache->state == DIE_SYMBOL || cache->state == DIE_FQN)
252252
return NULL;
253253
if (!cache->fqn || !*cache->fqn)
254254
return NULL;

0 commit comments

Comments
 (0)