Skip to content

Commit 1e4624e

Browse files
committed
eventfs: Do ctx->pos update for all iterations in eventfs_iterate()
The ctx->pos was only updated when it added an entry, but the "skip to current pos" check (c--) happened for every loop regardless of if the entry was added or not. This inconsistency caused readdir to be incorrect. It was due to: for (i = 0; i < ei->nr_entries; i++) { if (c > 0) { c--; continue; } mutex_lock(&eventfs_mutex); /* If ei->is_freed then just bail here, nothing more to do */ if (ei->is_freed) { mutex_unlock(&eventfs_mutex); goto out; } r = entry->callback(name, &mode, &cdata, &fops); mutex_unlock(&eventfs_mutex); [..] ctx->pos++; } But this can cause the iterator to return a file that was already read. That's because of the way the callback() works. Some events may not have all files, and the callback can return 0 to tell eventfs to skip the file for this directory. for instance, we have: # ls /sys/kernel/tracing/events/ftrace/function format hist hist_debug id inject and # ls /sys/kernel/tracing/events/sched/sched_switch/ enable filter format hist hist_debug id inject trigger Where the function directory is missing "enable", "filter" and "trigger". That's because the callback() for events has: static int event_callback(const char *name, umode_t *mode, void **data, const struct file_operations **fops) { struct trace_event_file *file = *data; struct trace_event_call *call = file->event_call; [..] /* * Only event directories that can be enabled should have * triggers or filters, with the exception of the "print" * event that can have a "trigger" file. */ if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) { if (call->class->reg && strcmp(name, "enable") == 0) { *mode = TRACE_MODE_WRITE; *fops = &ftrace_enable_fops; return 1; } if (strcmp(name, "filter") == 0) { *mode = TRACE_MODE_WRITE; *fops = &ftrace_event_filter_fops; return 1; } } if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) || strcmp(trace_event_name(call), "print") == 0) { if (strcmp(name, "trigger") == 0) { *mode = TRACE_MODE_WRITE; *fops = &event_trigger_fops; return 1; } } [..] return 0; } Where the function event has the TRACE_EVENT_FL_IGNORE_ENABLE set. This means that the entries array elements for "enable", "filter" and "trigger" when called on the function event will have the callback return 0 and not 1, to tell eventfs to skip these files for it. Because the "skip to current ctx->pos" check happened for all entries, but the ctx->pos++ only happened to entries that exist, it would confuse the reading of a directory. Which would cause: # ls /sys/kernel/tracing/events/ftrace/function/ format hist hist hist_debug hist_debug id inject inject The missing "enable", "filter" and "trigger" caused ls to show "hist", "hist_debug" and "inject" twice. Update the ctx->pos for every iteration to keep its update and the "skip" update consistent. This also means that on error, the ctx->pos needs to be decremented if it was incremented without adding something. Link: https://lore.kernel.org/all/20240104150500.38b15a62@gandalf.local.home/ Link: https://lore.kernel.org/linux-trace-kernel/20240104220048.172295263@goodmis.org Cc: Masami Hiramatsu <mhiramat@kernel.org> Cc: Mark Rutland <mark.rutland@arm.com> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Al Viro <viro@zeniv.linux.org.uk> Cc: Christian Brauner <brauner@kernel.org> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Fixes: 493ec81 ("eventfs: Stop using dcache_readdir() for getdents()") Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
1 parent e109dea commit 1e4624e

1 file changed

Lines changed: 14 additions & 7 deletions

File tree

fs/tracefs/event_inode.c

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -760,20 +760,21 @@ static int eventfs_iterate(struct file *file, struct dir_context *ctx)
760760
continue;
761761
}
762762

763+
ctx->pos++;
764+
763765
if (ei_child->is_freed)
764766
continue;
765767

766768
name = ei_child->name;
767769

768770
dentry = create_dir_dentry(ei, ei_child, ei_dentry);
769771
if (!dentry)
770-
goto out;
772+
goto out_dec;
771773
ino = dentry->d_inode->i_ino;
772774
dput(dentry);
773775

774776
if (!dir_emit(ctx, name, strlen(name), ino, DT_DIR))
775-
goto out;
776-
ctx->pos++;
777+
goto out_dec;
777778
}
778779

779780
for (i = 0; i < ei->nr_entries; i++) {
@@ -784,14 +785,16 @@ static int eventfs_iterate(struct file *file, struct dir_context *ctx)
784785
continue;
785786
}
786787

788+
ctx->pos++;
789+
787790
entry = &ei->entries[i];
788791
name = entry->name;
789792

790793
mutex_lock(&eventfs_mutex);
791794
/* If ei->is_freed then just bail here, nothing more to do */
792795
if (ei->is_freed) {
793796
mutex_unlock(&eventfs_mutex);
794-
goto out;
797+
goto out_dec;
795798
}
796799
r = entry->callback(name, &mode, &cdata, &fops);
797800
mutex_unlock(&eventfs_mutex);
@@ -800,19 +803,23 @@ static int eventfs_iterate(struct file *file, struct dir_context *ctx)
800803

801804
dentry = create_file_dentry(ei, i, ei_dentry, name, mode, cdata, fops);
802805
if (!dentry)
803-
goto out;
806+
goto out_dec;
804807
ino = dentry->d_inode->i_ino;
805808
dput(dentry);
806809

807810
if (!dir_emit(ctx, name, strlen(name), ino, DT_REG))
808-
goto out;
809-
ctx->pos++;
811+
goto out_dec;
810812
}
811813
ret = 1;
812814
out:
813815
srcu_read_unlock(&eventfs_srcu, idx);
814816

815817
return ret;
818+
819+
out_dec:
820+
/* Incremented ctx->pos without adding something, reset it */
821+
ctx->pos--;
822+
goto out;
816823
}
817824

818825
/**

0 commit comments

Comments
 (0)