@@ -194,6 +194,8 @@ struct bpf_verifier_stack_elem {
194194
195195#define BPF_GLOBAL_PERCPU_MA_MAX_SIZE 512
196196
197+ #define BPF_PRIV_STACK_MIN_SIZE 64
198+
197199static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx);
198200static int release_reference(struct bpf_verifier_env *env, int ref_obj_id);
199201static void invalidate_non_owning_refs(struct bpf_verifier_env *env);
@@ -6090,6 +6092,34 @@ static int check_ptr_alignment(struct bpf_verifier_env *env,
60906092 strict);
60916093}
60926094
6095+ static enum priv_stack_mode bpf_enable_priv_stack(struct bpf_prog *prog)
6096+ {
6097+ if (!bpf_jit_supports_private_stack())
6098+ return NO_PRIV_STACK;
6099+
6100+ /* bpf_prog_check_recur() checks all prog types that use bpf trampoline
6101+ * while kprobe/tp/perf_event/raw_tp don't use trampoline hence checked
6102+ * explicitly.
6103+ */
6104+ switch (prog->type) {
6105+ case BPF_PROG_TYPE_KPROBE:
6106+ case BPF_PROG_TYPE_TRACEPOINT:
6107+ case BPF_PROG_TYPE_PERF_EVENT:
6108+ case BPF_PROG_TYPE_RAW_TRACEPOINT:
6109+ return PRIV_STACK_ADAPTIVE;
6110+ case BPF_PROG_TYPE_TRACING:
6111+ case BPF_PROG_TYPE_LSM:
6112+ case BPF_PROG_TYPE_STRUCT_OPS:
6113+ if (bpf_prog_check_recur(prog))
6114+ return PRIV_STACK_ADAPTIVE;
6115+ fallthrough;
6116+ default:
6117+ break;
6118+ }
6119+
6120+ return NO_PRIV_STACK;
6121+ }
6122+
60936123static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
60946124{
60956125 if (env->prog->jit_requested)
@@ -6107,17 +6137,20 @@ static int round_up_stack_depth(struct bpf_verifier_env *env, int stack_depth)
61076137 * Since recursion is prevented by check_cfg() this algorithm
61086138 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
61096139 */
6110- static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
6140+ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx,
6141+ bool priv_stack_supported)
61116142{
61126143 struct bpf_subprog_info *subprog = env->subprog_info;
61136144 struct bpf_insn *insn = env->prog->insnsi;
6114- int depth = 0, frame = 0, i, subprog_end;
6145+ int depth = 0, frame = 0, i, subprog_end, subprog_depth ;
61156146 bool tail_call_reachable = false;
61166147 int ret_insn[MAX_CALL_FRAMES];
61176148 int ret_prog[MAX_CALL_FRAMES];
61186149 int j;
61196150
61206151 i = subprog[idx].start;
6152+ if (!priv_stack_supported)
6153+ subprog[idx].priv_stack_mode = NO_PRIV_STACK;
61216154process_func:
61226155 /* protect against potential stack overflow that might happen when
61236156 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
@@ -6144,11 +6177,31 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
61446177 depth);
61456178 return -EACCES;
61466179 }
6147- depth += round_up_stack_depth(env, subprog[idx].stack_depth);
6148- if (depth > MAX_BPF_STACK) {
6149- verbose(env, "combined stack size of %d calls is %d. Too large\n",
6150- frame + 1, depth);
6151- return -EACCES;
6180+
6181+ subprog_depth = round_up_stack_depth(env, subprog[idx].stack_depth);
6182+ if (priv_stack_supported) {
6183+ /* Request private stack support only if the subprog stack
6184+ * depth is no less than BPF_PRIV_STACK_MIN_SIZE. This is to
6185+ * avoid jit penalty if the stack usage is small.
6186+ */
6187+ if (subprog[idx].priv_stack_mode == PRIV_STACK_UNKNOWN &&
6188+ subprog_depth >= BPF_PRIV_STACK_MIN_SIZE)
6189+ subprog[idx].priv_stack_mode = PRIV_STACK_ADAPTIVE;
6190+ }
6191+
6192+ if (subprog[idx].priv_stack_mode == PRIV_STACK_ADAPTIVE) {
6193+ if (subprog_depth > MAX_BPF_STACK) {
6194+ verbose(env, "stack size of subprog %d is %d. Too large\n",
6195+ idx, subprog_depth);
6196+ return -EACCES;
6197+ }
6198+ } else {
6199+ depth += subprog_depth;
6200+ if (depth > MAX_BPF_STACK) {
6201+ verbose(env, "combined stack size of %d calls is %d. Too large\n",
6202+ frame + 1, depth);
6203+ return -EACCES;
6204+ }
61526205 }
61536206continue_func:
61546207 subprog_end = subprog[idx + 1].start;
@@ -6205,6 +6258,8 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
62056258 }
62066259 i = next_insn;
62076260 idx = sidx;
6261+ if (!priv_stack_supported)
6262+ subprog[idx].priv_stack_mode = NO_PRIV_STACK;
62086263
62096264 if (subprog[idx].has_tail_call)
62106265 tail_call_reachable = true;
@@ -6238,7 +6293,8 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
62386293 */
62396294 if (frame == 0)
62406295 return 0;
6241- depth -= round_up_stack_depth(env, subprog[idx].stack_depth);
6296+ if (subprog[idx].priv_stack_mode != PRIV_STACK_ADAPTIVE)
6297+ depth -= round_up_stack_depth(env, subprog[idx].stack_depth);
62426298 frame--;
62436299 i = ret_insn[frame];
62446300 idx = ret_prog[frame];
@@ -6247,16 +6303,36 @@ static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
62476303
62486304static int check_max_stack_depth(struct bpf_verifier_env *env)
62496305{
6306+ enum priv_stack_mode priv_stack_mode = PRIV_STACK_UNKNOWN;
62506307 struct bpf_subprog_info *si = env->subprog_info;
6308+ bool priv_stack_supported;
62516309 int ret;
62526310
62536311 for (int i = 0; i < env->subprog_cnt; i++) {
6312+ if (si[i].has_tail_call) {
6313+ priv_stack_mode = NO_PRIV_STACK;
6314+ break;
6315+ }
6316+ }
6317+
6318+ if (priv_stack_mode == PRIV_STACK_UNKNOWN)
6319+ priv_stack_mode = bpf_enable_priv_stack(env->prog);
6320+
6321+ /* All async_cb subprogs use normal kernel stack. If a particular
6322+ * subprog appears in both main prog and async_cb subtree, that
6323+ * subprog will use normal kernel stack to avoid potential nesting.
6324+ * The reverse subprog traversal ensures when main prog subtree is
6325+ * checked, the subprogs appearing in async_cb subtrees are already
6326+ * marked as using normal kernel stack, so stack size checking can
6327+ * be done properly.
6328+ */
6329+ for (int i = env->subprog_cnt - 1; i >= 0; i--) {
62546330 if (!i || si[i].is_async_cb) {
6255- ret = check_max_stack_depth_subprog(env, i);
6331+ priv_stack_supported = !i && priv_stack_mode == PRIV_STACK_ADAPTIVE;
6332+ ret = check_max_stack_depth_subprog(env, i, priv_stack_supported);
62566333 if (ret < 0)
62576334 return ret;
62586335 }
6259- continue;
62606336 }
62616337 return 0;
62626338}
0 commit comments