Message ID | 159481854255.454654.15065796817034016611.stgit@toke.dk |
---|---|
State | Changes Requested |
Delegated to: | BPF Maintainers |
Headers | show |
Series | bpf: Support multi-attach for freplace programs | expand |
On Wed, Jul 15, 2020 at 03:09:02PM +0200, Toke Høiland-Jørgensen wrote: > > + if (tgt_prog_fd) { > + /* For now we only allow new targets for BPF_PROG_TYPE_EXT */ > + if (prog->type != BPF_PROG_TYPE_EXT || > + !btf_id) { > + err = -EINVAL; > + goto out_put_prog; > + } > + tgt_prog = bpf_prog_get(tgt_prog_fd); > + if (IS_ERR(tgt_prog)) { > + err = PTR_ERR(tgt_prog); > + tgt_prog = NULL; > + goto out_put_prog; > + } > + > + } else if (btf_id) { > + err = -EINVAL; > + goto out_put_prog; > + } else { > + btf_id = prog->aux->attach_btf_id; > + tgt_prog = prog->aux->linked_prog; > + if (tgt_prog) > + bpf_prog_inc(tgt_prog); /* we call bpf_prog_put() on link release */ so the first prog_load cmd will beholding the first target prog? This is complete non starter. You didn't mention such decision anywhere. The first ext prog will attach to the first dispatcher xdp prog, then that ext prog will multi attach to second dispatcher xdp prog and the first dispatcher prog will live in the kernel forever. That's not what we discussed back in April. > + } > + err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, > + &fmodel, &addr, NULL, NULL); This is a second check for btf id match? What's the point? The first one was done at load time. When tgt_prog_fd/tgt_btf_id are zero there is no need to recheck. I really hope I'm misreading these patches, because they look very raw.
Hi Toke, url: https://github.com/0day-ci/linux/commits/Toke-H-iland-J-rgensen/bpf-Support-multi-attach-for-freplace-programs/20200715-211145 base: https://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git master config: x86_64-randconfig-m001-20200715 (attached as .config) compiler: gcc-9 (Debian 9.3.0-14) 9.3.0 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> Reported-by: Dan Carpenter <dan.carpenter@oracle.com> New smatch warnings: kernel/bpf/verifier.c:10900 bpf_check_attach_target() error: we previously assumed 'tgt_prog' could be null (see line 10772) Old smatch warnings: include/linux/bpf_verifier.h:351 bpf_verifier_log_needed() error: we previously assumed 'log' could be null (see line 350) # https://github.com/0day-ci/linux/commit/cc8571ec751a3a6065838e0b15105f8be0ced6fe git remote add linux-review https://github.com/0day-ci/linux git remote update linux-review git checkout cc8571ec751a3a6065838e0b15105f8be0ced6fe vim +/tgt_prog +10900 kernel/bpf/verifier.c c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10734 int bpf_check_attach_target(struct bpf_verifier_log *log, c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10735 const struct bpf_prog *prog, c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10736 const struct bpf_prog *tgt_prog, c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10737 u32 btf_id, c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10738 struct btf_func_model *fmodel, c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10739 long *tgt_addr, c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10740 const char **tgt_name, c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10741 const struct btf_type **tgt_type) 38207291604401 Martin KaFai Lau 2019-10-24 10742 { be8704ff07d237 Alexei Starovoitov 2020-01-20 10743 bool prog_extension = prog->type == BPF_PROG_TYPE_EXT; f1b9509c2fb0ef Alexei Starovoitov 2019-10-30 10744 const char prefix[] = "btf_trace_"; 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10745 int ret = 0, subprog = -1, i; 38207291604401 Martin KaFai Lau 2019-10-24 10746 const struct btf_type *t; 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10747 bool conservative = true; 38207291604401 Martin KaFai Lau 2019-10-24 10748 const char *tname; 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10749 struct btf *btf; c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10750 long addr = 0; 38207291604401 Martin KaFai Lau 2019-10-24 10751 f1b9509c2fb0ef Alexei Starovoitov 2019-10-30 10752 if (!btf_id) { e33243ff1dd2cb Toke Høiland-Jørgensen 2020-07-15 10753 bpf_log(log, "Tracing programs must provide btf_id\n"); f1b9509c2fb0ef Alexei Starovoitov 2019-10-30 10754 return -EINVAL; f1b9509c2fb0ef Alexei Starovoitov 2019-10-30 10755 } 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10756 btf = bpf_prog_get_target_btf(prog); 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10757 if (!btf) { e33243ff1dd2cb Toke Høiland-Jørgensen 2020-07-15 10758 bpf_log(log, 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10759 "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n"); 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10760 return -EINVAL; 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10761 } 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10762 t = btf_type_by_id(btf, btf_id); 38207291604401 Martin KaFai Lau 2019-10-24 10763 if (!t) { e33243ff1dd2cb Toke Høiland-Jørgensen 2020-07-15 10764 bpf_log(log, "attach_btf_id %u is invalid\n", btf_id); 38207291604401 Martin KaFai Lau 2019-10-24 10765 return -EINVAL; 38207291604401 Martin KaFai Lau 2019-10-24 10766 } 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10767 tname = btf_name_by_offset(btf, t->name_off); f1b9509c2fb0ef Alexei Starovoitov 2019-10-30 10768 if (!tname) { e33243ff1dd2cb Toke Høiland-Jørgensen 2020-07-15 10769 bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id); f1b9509c2fb0ef Alexei Starovoitov 2019-10-30 10770 return -EINVAL; f1b9509c2fb0ef Alexei Starovoitov 2019-10-30 10771 } 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 @10772 if (tgt_prog) { 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10773 struct bpf_prog_aux *aux = tgt_prog->aux; 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10774 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10775 for (i = 0; i < aux->func_info_cnt; i++) 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10776 if (aux->func_info[i].type_id == btf_id) { 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10777 subprog = i; 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10778 break; 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10779 } 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10780 if (subprog == -1) { e33243ff1dd2cb Toke Høiland-Jørgensen 2020-07-15 10781 bpf_log(log, "Subprog %s doesn't exist\n", tname); 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10782 return -EINVAL; 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10783 } 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10784 conservative = aux->func_info_aux[subprog].unreliable; be8704ff07d237 Alexei Starovoitov 2020-01-20 10785 if (prog_extension) { be8704ff07d237 Alexei Starovoitov 2020-01-20 10786 if (conservative) { e33243ff1dd2cb Toke Høiland-Jørgensen 2020-07-15 10787 bpf_log(log, be8704ff07d237 Alexei Starovoitov 2020-01-20 10788 "Cannot replace static functions\n"); be8704ff07d237 Alexei Starovoitov 2020-01-20 10789 return -EINVAL; be8704ff07d237 Alexei Starovoitov 2020-01-20 10790 } be8704ff07d237 Alexei Starovoitov 2020-01-20 10791 if (!prog->jit_requested) { e33243ff1dd2cb Toke Høiland-Jørgensen 2020-07-15 10792 bpf_log(log, be8704ff07d237 Alexei Starovoitov 2020-01-20 10793 "Extension programs should be JITed\n"); be8704ff07d237 Alexei Starovoitov 2020-01-20 10794 return -EINVAL; be8704ff07d237 Alexei Starovoitov 2020-01-20 10795 } be8704ff07d237 Alexei Starovoitov 2020-01-20 10796 } be8704ff07d237 Alexei Starovoitov 2020-01-20 10797 if (!tgt_prog->jited) { e33243ff1dd2cb Toke Høiland-Jørgensen 2020-07-15 10798 bpf_log(log, "Can attach to only JITed progs\n"); be8704ff07d237 Alexei Starovoitov 2020-01-20 10799 return -EINVAL; be8704ff07d237 Alexei Starovoitov 2020-01-20 10800 } be8704ff07d237 Alexei Starovoitov 2020-01-20 10801 if (tgt_prog->type == prog->type) { be8704ff07d237 Alexei Starovoitov 2020-01-20 10802 /* Cannot fentry/fexit another fentry/fexit program. be8704ff07d237 Alexei Starovoitov 2020-01-20 10803 * Cannot attach program extension to another extension. be8704ff07d237 Alexei Starovoitov 2020-01-20 10804 * It's ok to attach fentry/fexit to extension program. be8704ff07d237 Alexei Starovoitov 2020-01-20 10805 */ e33243ff1dd2cb Toke Høiland-Jørgensen 2020-07-15 10806 bpf_log(log, "Cannot recursively attach\n"); be8704ff07d237 Alexei Starovoitov 2020-01-20 10807 return -EINVAL; be8704ff07d237 Alexei Starovoitov 2020-01-20 10808 } be8704ff07d237 Alexei Starovoitov 2020-01-20 10809 if (tgt_prog->type == BPF_PROG_TYPE_TRACING && be8704ff07d237 Alexei Starovoitov 2020-01-20 10810 prog_extension && be8704ff07d237 Alexei Starovoitov 2020-01-20 10811 (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY || be8704ff07d237 Alexei Starovoitov 2020-01-20 10812 tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) { be8704ff07d237 Alexei Starovoitov 2020-01-20 10813 /* Program extensions can extend all program types be8704ff07d237 Alexei Starovoitov 2020-01-20 10814 * except fentry/fexit. The reason is the following. be8704ff07d237 Alexei Starovoitov 2020-01-20 10815 * The fentry/fexit programs are used for performance be8704ff07d237 Alexei Starovoitov 2020-01-20 10816 * analysis, stats and can be attached to any program be8704ff07d237 Alexei Starovoitov 2020-01-20 10817 * type except themselves. When extension program is be8704ff07d237 Alexei Starovoitov 2020-01-20 10818 * replacing XDP function it is necessary to allow be8704ff07d237 Alexei Starovoitov 2020-01-20 10819 * performance analysis of all functions. Both original be8704ff07d237 Alexei Starovoitov 2020-01-20 10820 * XDP program and its program extension. Hence be8704ff07d237 Alexei Starovoitov 2020-01-20 10821 * attaching fentry/fexit to BPF_PROG_TYPE_EXT is be8704ff07d237 Alexei Starovoitov 2020-01-20 10822 * allowed. If extending of fentry/fexit was allowed it be8704ff07d237 Alexei Starovoitov 2020-01-20 10823 * would be possible to create long call chain be8704ff07d237 Alexei Starovoitov 2020-01-20 10824 * fentry->extension->fentry->extension beyond be8704ff07d237 Alexei Starovoitov 2020-01-20 10825 * reasonable stack size. Hence extending fentry is not be8704ff07d237 Alexei Starovoitov 2020-01-20 10826 * allowed. be8704ff07d237 Alexei Starovoitov 2020-01-20 10827 */ e33243ff1dd2cb Toke Høiland-Jørgensen 2020-07-15 10828 bpf_log(log, "Cannot extend fentry/fexit\n"); be8704ff07d237 Alexei Starovoitov 2020-01-20 10829 return -EINVAL; be8704ff07d237 Alexei Starovoitov 2020-01-20 10830 } 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10831 } else { be8704ff07d237 Alexei Starovoitov 2020-01-20 10832 if (prog_extension) { e33243ff1dd2cb Toke Høiland-Jørgensen 2020-07-15 10833 bpf_log(log, "Cannot replace kernel functions\n"); be8704ff07d237 Alexei Starovoitov 2020-01-20 10834 return -EINVAL; be8704ff07d237 Alexei Starovoitov 2020-01-20 10835 } 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10836 } f1b9509c2fb0ef Alexei Starovoitov 2019-10-30 10837 f1b9509c2fb0ef Alexei Starovoitov 2019-10-30 10838 switch (prog->expected_attach_type) { f1b9509c2fb0ef Alexei Starovoitov 2019-10-30 10839 case BPF_TRACE_RAW_TP: 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10840 if (tgt_prog) { e33243ff1dd2cb Toke Høiland-Jørgensen 2020-07-15 10841 bpf_log(log, 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10842 "Only FENTRY/FEXIT progs are attachable to another BPF prog\n"); 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10843 return -EINVAL; 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10844 } 38207291604401 Martin KaFai Lau 2019-10-24 10845 if (!btf_type_is_typedef(t)) { e33243ff1dd2cb Toke Høiland-Jørgensen 2020-07-15 10846 bpf_log(log, "attach_btf_id %u is not a typedef\n", 38207291604401 Martin KaFai Lau 2019-10-24 10847 btf_id); 38207291604401 Martin KaFai Lau 2019-10-24 10848 return -EINVAL; 38207291604401 Martin KaFai Lau 2019-10-24 10849 } f1b9509c2fb0ef Alexei Starovoitov 2019-10-30 10850 if (strncmp(prefix, tname, sizeof(prefix) - 1)) { e33243ff1dd2cb Toke Høiland-Jørgensen 2020-07-15 10851 bpf_log(log, "attach_btf_id %u points to wrong type name %s\n", 38207291604401 Martin KaFai Lau 2019-10-24 10852 btf_id, tname); 38207291604401 Martin KaFai Lau 2019-10-24 10853 return -EINVAL; 38207291604401 Martin KaFai Lau 2019-10-24 10854 } 38207291604401 Martin KaFai Lau 2019-10-24 10855 tname += sizeof(prefix) - 1; 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10856 t = btf_type_by_id(btf, t->type); 38207291604401 Martin KaFai Lau 2019-10-24 10857 if (!btf_type_is_ptr(t)) 38207291604401 Martin KaFai Lau 2019-10-24 10858 /* should never happen in valid vmlinux build */ 38207291604401 Martin KaFai Lau 2019-10-24 10859 return -EINVAL; 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10860 t = btf_type_by_id(btf, t->type); 38207291604401 Martin KaFai Lau 2019-10-24 10861 if (!btf_type_is_func_proto(t)) 38207291604401 Martin KaFai Lau 2019-10-24 10862 /* should never happen in valid vmlinux build */ 38207291604401 Martin KaFai Lau 2019-10-24 10863 return -EINVAL; 38207291604401 Martin KaFai Lau 2019-10-24 10864 c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10865 break; 15d83c4d7cef5c Yonghong Song 2020-05-09 10866 case BPF_TRACE_ITER: 15d83c4d7cef5c Yonghong Song 2020-05-09 10867 if (!btf_type_is_func(t)) { e33243ff1dd2cb Toke Høiland-Jørgensen 2020-07-15 10868 bpf_log(log, "attach_btf_id %u is not a function\n", 15d83c4d7cef5c Yonghong Song 2020-05-09 10869 btf_id); 15d83c4d7cef5c Yonghong Song 2020-05-09 10870 return -EINVAL; 15d83c4d7cef5c Yonghong Song 2020-05-09 10871 } 15d83c4d7cef5c Yonghong Song 2020-05-09 10872 t = btf_type_by_id(btf, t->type); 15d83c4d7cef5c Yonghong Song 2020-05-09 10873 if (!btf_type_is_func_proto(t)) 15d83c4d7cef5c Yonghong Song 2020-05-09 10874 return -EINVAL; c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10875 ret = btf_distill_func_proto(log, btf, t, tname, fmodel); c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10876 if (ret) 15d83c4d7cef5c Yonghong Song 2020-05-09 10877 return ret; c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10878 break; be8704ff07d237 Alexei Starovoitov 2020-01-20 10879 default: be8704ff07d237 Alexei Starovoitov 2020-01-20 10880 if (!prog_extension) be8704ff07d237 Alexei Starovoitov 2020-01-20 10881 return -EINVAL; be8704ff07d237 Alexei Starovoitov 2020-01-20 10882 /* fallthrough */ ae24082331d9bb KP Singh 2020-03-04 10883 case BPF_MODIFY_RETURN: 9e4e01dfd3254c KP Singh 2020-03-29 10884 case BPF_LSM_MAC: fec56f5890d93f Alexei Starovoitov 2019-11-14 10885 case BPF_TRACE_FENTRY: fec56f5890d93f Alexei Starovoitov 2019-11-14 10886 case BPF_TRACE_FEXIT: fec56f5890d93f Alexei Starovoitov 2019-11-14 10887 if (!btf_type_is_func(t)) { e33243ff1dd2cb Toke Høiland-Jørgensen 2020-07-15 10888 bpf_log(log, "attach_btf_id %u is not a function\n", fec56f5890d93f Alexei Starovoitov 2019-11-14 10889 btf_id); fec56f5890d93f Alexei Starovoitov 2019-11-14 10890 return -EINVAL; fec56f5890d93f Alexei Starovoitov 2019-11-14 10891 } be8704ff07d237 Alexei Starovoitov 2020-01-20 10892 if (prog_extension && e33243ff1dd2cb Toke Høiland-Jørgensen 2020-07-15 10893 btf_check_type_match(log, prog, btf, t)) be8704ff07d237 Alexei Starovoitov 2020-01-20 10894 return -EINVAL; 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10895 t = btf_type_by_id(btf, t->type); fec56f5890d93f Alexei Starovoitov 2019-11-14 10896 if (!btf_type_is_func_proto(t)) fec56f5890d93f Alexei Starovoitov 2019-11-14 10897 return -EINVAL; c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10898 cc8571ec751a3a Toke Høiland-Jørgensen 2020-07-15 10899 if ((prog->aux->tgt_prog_type && cc8571ec751a3a Toke Høiland-Jørgensen 2020-07-15 @10900 prog->aux->tgt_prog_type != tgt_prog->type) || ^^^^^^^^^^^^^^ cc8571ec751a3a Toke Høiland-Jørgensen 2020-07-15 10901 (prog->aux->tgt_attach_type && cc8571ec751a3a Toke Høiland-Jørgensen 2020-07-15 10902 prog->aux->tgt_attach_type != tgt_prog->expected_attach_type)) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Not checked. cc8571ec751a3a Toke Høiland-Jørgensen 2020-07-15 10903 return -EINVAL; cc8571ec751a3a Toke Høiland-Jørgensen 2020-07-15 10904 c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10905 if (tgt_prog && conservative) 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10906 t = NULL; c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10907 c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10908 ret = btf_distill_func_proto(log, btf, t, tname, fmodel); fec56f5890d93f Alexei Starovoitov 2019-11-14 10909 if (ret < 0) c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10910 return ret; c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10911 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10912 if (tgt_prog) { e9eeec58c992c4 Yonghong Song 2019-12-04 10913 if (subprog == 0) e9eeec58c992c4 Yonghong Song 2019-12-04 10914 addr = (long) tgt_prog->bpf_func; e9eeec58c992c4 Yonghong Song 2019-12-04 10915 else 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10916 addr = (long) tgt_prog->aux->func[subprog]->bpf_func; 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10917 } else { fec56f5890d93f Alexei Starovoitov 2019-11-14 10918 addr = kallsyms_lookup_name(tname); fec56f5890d93f Alexei Starovoitov 2019-11-14 10919 if (!addr) { e33243ff1dd2cb Toke Høiland-Jørgensen 2020-07-15 10920 bpf_log(log, fec56f5890d93f Alexei Starovoitov 2019-11-14 10921 "The address of function %s cannot be found\n", fec56f5890d93f Alexei Starovoitov 2019-11-14 10922 tname); c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10923 return -ENOENT; fec56f5890d93f Alexei Starovoitov 2019-11-14 10924 } 5b92a28aae4dd0 Alexei Starovoitov 2019-11-14 10925 } c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10926 break; c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10927 } 18644cec714aab Alexei Starovoitov 2020-05-28 10928 c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10929 *tgt_addr = addr; c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10930 if (tgt_name) c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10931 *tgt_name = tname; c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10932 if (tgt_type) c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10933 *tgt_type = t; c2d0f6ffe7709e Toke Høiland-Jørgensen 2020-07-15 10934 return 0; 18644cec714aab Alexei Starovoitov 2020-05-28 10935 } --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Alexei Starovoitov <alexei.starovoitov@gmail.com> writes: > On Wed, Jul 15, 2020 at 03:09:02PM +0200, Toke Høiland-Jørgensen wrote: >> >> + if (tgt_prog_fd) { >> + /* For now we only allow new targets for BPF_PROG_TYPE_EXT */ >> + if (prog->type != BPF_PROG_TYPE_EXT || >> + !btf_id) { >> + err = -EINVAL; >> + goto out_put_prog; >> + } >> + tgt_prog = bpf_prog_get(tgt_prog_fd); >> + if (IS_ERR(tgt_prog)) { >> + err = PTR_ERR(tgt_prog); >> + tgt_prog = NULL; >> + goto out_put_prog; >> + } >> + >> + } else if (btf_id) { >> + err = -EINVAL; >> + goto out_put_prog; >> + } else { >> + btf_id = prog->aux->attach_btf_id; >> + tgt_prog = prog->aux->linked_prog; >> + if (tgt_prog) >> + bpf_prog_inc(tgt_prog); /* we call bpf_prog_put() on link release */ > > so the first prog_load cmd will beholding the first target prog? > This is complete non starter. > You didn't mention such decision anywhere. > The first ext prog will attach to the first dispatcher xdp prog, > then that ext prog will multi attach to second dispatcher xdp prog and > the first dispatcher prog will live in the kernel forever. Huh, yeah, you're right that's no good. Missing that was a think-o on my part, sorry about that :/ > That's not what we discussed back in April. No, you mentioned turning aux->linked_prog into a list. However once I started looking at it I figured it was better to actually have all this (the trampoline and ref) as part of the bpf_link structure, since logically they're related. But as you pointed out, the original reference sticks. So either that needs to be removed, or I need to go back to the 'aux->linked_progs as a list' idea. Any preference? >> + } >> + err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, >> + &fmodel, &addr, NULL, NULL); > > This is a second check for btf id match? > What's the point? The first one was done at load time. > When tgt_prog_fd/tgt_btf_id are zero there is no need to recheck. It's not strictly needed if tgt_prog/btf_id is not set, but it doesn't hurt either; and it was convenient to reuse it to resolve the func addr for the trampoline + it means everything goes through the same code path. > I really hope I'm misreading these patches, because they look very raw. I don't think you are. I'll admit to them being a bit raw, but this was as far as I got and since I'll be away for three weeks I figured it was better to post them in case anyone else was interested in playing with it. So if anyone wants to pick these patches up while I'm gone, feel free; otherwise, I'll get back to it after my vacation :) -Toke
On Thu, Jul 16, 2020 at 12:50:05PM +0200, Toke Høiland-Jørgensen wrote: > Alexei Starovoitov <alexei.starovoitov@gmail.com> writes: > > > On Wed, Jul 15, 2020 at 03:09:02PM +0200, Toke Høiland-Jørgensen wrote: > >> > >> + if (tgt_prog_fd) { > >> + /* For now we only allow new targets for BPF_PROG_TYPE_EXT */ > >> + if (prog->type != BPF_PROG_TYPE_EXT || > >> + !btf_id) { > >> + err = -EINVAL; > >> + goto out_put_prog; > >> + } > >> + tgt_prog = bpf_prog_get(tgt_prog_fd); > >> + if (IS_ERR(tgt_prog)) { > >> + err = PTR_ERR(tgt_prog); > >> + tgt_prog = NULL; > >> + goto out_put_prog; > >> + } > >> + > >> + } else if (btf_id) { > >> + err = -EINVAL; > >> + goto out_put_prog; > >> + } else { > >> + btf_id = prog->aux->attach_btf_id; > >> + tgt_prog = prog->aux->linked_prog; > >> + if (tgt_prog) > >> + bpf_prog_inc(tgt_prog); /* we call bpf_prog_put() on link release */ > > > > so the first prog_load cmd will beholding the first target prog? > > This is complete non starter. > > You didn't mention such decision anywhere. > > The first ext prog will attach to the first dispatcher xdp prog, > > then that ext prog will multi attach to second dispatcher xdp prog and > > the first dispatcher prog will live in the kernel forever. > > Huh, yeah, you're right that's no good. Missing that was a think-o on my > part, sorry about that :/ > > > That's not what we discussed back in April. > > No, you mentioned turning aux->linked_prog into a list. However once I > started looking at it I figured it was better to actually have all this > (the trampoline and ref) as part of the bpf_link structure, since > logically they're related. > > But as you pointed out, the original reference sticks. So either that > needs to be removed, or I need to go back to the 'aux->linked_progs as a > list' idea. Any preference? Good question. Back then I was thinking about converting linked_prog into link list, since standalone single linked_prog is quite odd, because attaching ext prog to multiple tgt progs should have equivalent properties across all attachments. Back then bpf_link wasn't quite developed. Now I feel moving into bpf_tracing_link is better. I guess a link list of bpf_tracing_link-s from 'struct bpf_prog' might work. At prog load time we can do bpf_link_init() only (without doing bpf_link_prime) and keep this pre-populated bpf_link with target bpf prog and trampoline in a link list accessed from 'struct bpf_prog'. Then bpf_tracing_prog_attach() without extra tgt_prog_fd/btf_id would complete that bpf_tracing_link by calling bpf_link_prime() and bpf_link_settle() without allocating new one. Something like: struct bpf_tracing_link { struct bpf_link link; /* ext prog pointer is hidding in there */ enum bpf_attach_type attach_type; struct bpf_trampoline *tr; struct bpf_prog *tgt_prog; /* old aux->linked_prog */ }; ext prog -> aux -> link list of above bpf_tracing_link-s It's a circular reference, obviously. Need to think through the complications and locking. bpf_tracing_prog_attach() with tgt_prog_fd/btf_id will alloc new bpf_tracing_link and will add it to a link list. Just a rough idea. I wonder what Andrii thinks. > > >> + } > >> + err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, > >> + &fmodel, &addr, NULL, NULL); > > > > This is a second check for btf id match? > > What's the point? The first one was done at load time. > > When tgt_prog_fd/tgt_btf_id are zero there is no need to recheck. > > It's not strictly needed if tgt_prog/btf_id is not set, but it doesn't > hurt either; and it was convenient to reuse it to resolve the func addr > for the trampoline + it means everything goes through the same code path. Doing the same work twice is a sign that this function needs to split into more than 3 helpers, so the work is not repeated. > > > I really hope I'm misreading these patches, because they look very raw. > > I don't think you are. I'll admit to them being a bit raw, but this was > as far as I got and since I'll be away for three weeks I figured it was > better to post them in case anyone else was interested in playing with > it. Since it was v2 I figured you want it to land and it's ready. Next time please mention the state of patches. It's absolutely fine to post raw patches. It's fine to post stuff that doesn't compile. But please explain the state in commit logs or cover.
Alexei Starovoitov <alexei.starovoitov@gmail.com> writes: > On Thu, Jul 16, 2020 at 12:50:05PM +0200, Toke Høiland-Jørgensen wrote: >> Alexei Starovoitov <alexei.starovoitov@gmail.com> writes: >> >> > On Wed, Jul 15, 2020 at 03:09:02PM +0200, Toke Høiland-Jørgensen wrote: >> >> >> >> + if (tgt_prog_fd) { >> >> + /* For now we only allow new targets for BPF_PROG_TYPE_EXT */ >> >> + if (prog->type != BPF_PROG_TYPE_EXT || >> >> + !btf_id) { >> >> + err = -EINVAL; >> >> + goto out_put_prog; >> >> + } >> >> + tgt_prog = bpf_prog_get(tgt_prog_fd); >> >> + if (IS_ERR(tgt_prog)) { >> >> + err = PTR_ERR(tgt_prog); >> >> + tgt_prog = NULL; >> >> + goto out_put_prog; >> >> + } >> >> + >> >> + } else if (btf_id) { >> >> + err = -EINVAL; >> >> + goto out_put_prog; >> >> + } else { >> >> + btf_id = prog->aux->attach_btf_id; >> >> + tgt_prog = prog->aux->linked_prog; >> >> + if (tgt_prog) >> >> + bpf_prog_inc(tgt_prog); /* we call bpf_prog_put() on link release */ >> > >> > so the first prog_load cmd will beholding the first target prog? >> > This is complete non starter. >> > You didn't mention such decision anywhere. >> > The first ext prog will attach to the first dispatcher xdp prog, >> > then that ext prog will multi attach to second dispatcher xdp prog and >> > the first dispatcher prog will live in the kernel forever. >> >> Huh, yeah, you're right that's no good. Missing that was a think-o on my >> part, sorry about that :/ >> >> > That's not what we discussed back in April. >> >> No, you mentioned turning aux->linked_prog into a list. However once I >> started looking at it I figured it was better to actually have all this >> (the trampoline and ref) as part of the bpf_link structure, since >> logically they're related. >> >> But as you pointed out, the original reference sticks. So either that >> needs to be removed, or I need to go back to the 'aux->linked_progs as a >> list' idea. Any preference? > > Good question. Back then I was thinking about converting linked_prog into link > list, since standalone single linked_prog is quite odd, because attaching ext > prog to multiple tgt progs should have equivalent properties across all > attachments. > Back then bpf_link wasn't quite developed. > Now I feel moving into bpf_tracing_link is better. > I guess a link list of bpf_tracing_link-s from 'struct bpf_prog' might work. > At prog load time we can do bpf_link_init() only (without doing bpf_link_prime) > and keep this pre-populated bpf_link with target bpf prog and trampoline > in a link list accessed from 'struct bpf_prog'. > Then bpf_tracing_prog_attach() without extra tgt_prog_fd/btf_id would complete > that bpf_tracing_link by calling bpf_link_prime() and bpf_link_settle() > without allocating new one. > Something like: > struct bpf_tracing_link { > struct bpf_link link; /* ext prog pointer is hidding in there */ > enum bpf_attach_type attach_type; > struct bpf_trampoline *tr; > struct bpf_prog *tgt_prog; /* old aux->linked_prog */ > }; > > ext prog -> aux -> link list of above bpf_tracing_link-s Yeah, I thought along these lines as well (was thinking a new struct referenced from bpf_tracing_link, but sure, why not just stick the whole thing into aux?). > It's a circular reference, obviously. > Need to think through the complications and locking. Yup, will do so when I get back to this. One other implication of this change: If we make the linked_prog completely dynamic you can no longer do: link_fd = bpf_raw_tracepoint_open(prog); close(link_fd); link_fd = bpf_raw_tracepoint_open(prog): since after that close(), the original linked_prog will be gone. Unless we always leave at least one linked_prog alive? But then we can't guarantee that it's the target that was supplied on program load if it was reattached. Is that acceptable? >> I don't think you are. I'll admit to them being a bit raw, but this was >> as far as I got and since I'll be away for three weeks I figured it was >> better to post them in case anyone else was interested in playing with >> it. > > Since it was v2 I figured you want it to land and it's ready. > Next time please mention the state of patches. > It's absolutely fine to post raw patches. It's fine to post stuff > that doesn't compile. But please explain the state in commit logs or cover. Right, sorry that was not clear; will make sure to spell it out next time. -Toke
On Thu, Jul 16, 2020 at 7:06 PM Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote: > > On Thu, Jul 16, 2020 at 12:50:05PM +0200, Toke Høiland-Jørgensen wrote: > > Alexei Starovoitov <alexei.starovoitov@gmail.com> writes: > > > > > On Wed, Jul 15, 2020 at 03:09:02PM +0200, Toke Høiland-Jørgensen wrote: > > >> > > >> + if (tgt_prog_fd) { > > >> + /* For now we only allow new targets for BPF_PROG_TYPE_EXT */ > > >> + if (prog->type != BPF_PROG_TYPE_EXT || > > >> + !btf_id) { > > >> + err = -EINVAL; > > >> + goto out_put_prog; > > >> + } > > >> + tgt_prog = bpf_prog_get(tgt_prog_fd); > > >> + if (IS_ERR(tgt_prog)) { > > >> + err = PTR_ERR(tgt_prog); > > >> + tgt_prog = NULL; > > >> + goto out_put_prog; > > >> + } > > >> + > > >> + } else if (btf_id) { > > >> + err = -EINVAL; > > >> + goto out_put_prog; > > >> + } else { > > >> + btf_id = prog->aux->attach_btf_id; > > >> + tgt_prog = prog->aux->linked_prog; > > >> + if (tgt_prog) > > >> + bpf_prog_inc(tgt_prog); /* we call bpf_prog_put() on link release */ > > > > > > so the first prog_load cmd will beholding the first target prog? > > > This is complete non starter. > > > You didn't mention such decision anywhere. > > > The first ext prog will attach to the first dispatcher xdp prog, > > > then that ext prog will multi attach to second dispatcher xdp prog and > > > the first dispatcher prog will live in the kernel forever. > > > > Huh, yeah, you're right that's no good. Missing that was a think-o on my > > part, sorry about that :/ > > > > > That's not what we discussed back in April. > > > > No, you mentioned turning aux->linked_prog into a list. However once I > > started looking at it I figured it was better to actually have all this > > (the trampoline and ref) as part of the bpf_link structure, since > > logically they're related. > > > > But as you pointed out, the original reference sticks. So either that > > needs to be removed, or I need to go back to the 'aux->linked_progs as a > > list' idea. Any preference? > > Good question. Back then I was thinking about converting linked_prog into link > list, since standalone single linked_prog is quite odd, because attaching ext > prog to multiple tgt progs should have equivalent properties across all > attachments. > Back then bpf_link wasn't quite developed. > Now I feel moving into bpf_tracing_link is better. > I guess a link list of bpf_tracing_link-s from 'struct bpf_prog' might work. > At prog load time we can do bpf_link_init() only (without doing bpf_link_prime) > and keep this pre-populated bpf_link with target bpf prog and trampoline > in a link list accessed from 'struct bpf_prog'. > Then bpf_tracing_prog_attach() without extra tgt_prog_fd/btf_id would complete > that bpf_tracing_link by calling bpf_link_prime() and bpf_link_settle() > without allocating new one. > Something like: > struct bpf_tracing_link { > struct bpf_link link; /* ext prog pointer is hidding in there */ > enum bpf_attach_type attach_type; > struct bpf_trampoline *tr; > struct bpf_prog *tgt_prog; /* old aux->linked_prog */ > }; > > ext prog -> aux -> link list of above bpf_tracing_link-s > > It's a circular reference, obviously. > Need to think through the complications and locking. > > bpf_tracing_prog_attach() with tgt_prog_fd/btf_id will alloc new bpf_tracing_link > and will add it to a link list. > > Just a rough idea. I wonder what Andrii thinks. > I need to spend more time reading existing and new code to see all the details, but I'll throw a slightly different proposal and let you guys shoot it down. So, what if instead of having linked_prog (as bpf_prog *, refcnt'ed), at BPF_PROG_LOAD time we just record the target prog's ID. BPF verifier, when doing its target prog checks would attempt to get bpf_prog * reference; if by that time the target program is gone, fail, of course. If not, everything proceeds as is, at the end of verification target_prog is put until attach time. Then at attach time, we either go with pre-recorded (in prog->aux->linked_prog_id) target prog's ID or we get a new one from RAW_TP_OPEN tgt_prog_fd. Either way, we bump refcnt on that target prog and keep it with bpf_tracing_link (so link on detach would put target_prog, that way it doesn't go away while EXT prog is attached). Then do all the compatibility checks, and if everything works out, bpf_tracing_link gets created, we record trampoline there, etc, etc. Basically, instead of having an EXT prog holding a reference to the target prog, only attachment (bpf_link) does that, which conceptually also seems to make more sense to me. For verification we store prog ID and don't hold target prog at all. Now, there will be a problem once you attach EXT prog to a new XDP root program and release a link against the original XDP root program. First, I hope I understand the desired sequence right, here's an example: 1. load XDP root prog X 2. load EXT prog with target prog X 3. attach EXT prog to prog X 4. load XDP root prog Y 5. attach EXT prog to prog Y (Y and X should be "compatible") 6. detach prog X (close bpf_link) Is that the right sequence? If yes, then the problem with storing ID of prog X in EXT prog->aux->linked_prog_id is that you won't be able to re-attach to new prog Z, because there won't be anything to check compatibility against (prog X will be long time gone). So we can do two things here: 1. on attach, replace ext_prog->aux->linked_prog_id with the latest attached prog (prog Y ID from above example) 2. instead of recording target program FD/ID, capture BTF FD and/or enough BTF information for checking compatibility. Approach 2) seems like conceptually the right thing to do (record type info we care about, not an **instance** of BPF program, compatible with that type info), but technically might be harder. That's my thoughts without digging too deep, so sorry if I'm making some stupid assumptions. [...]
On Fri, Jul 17, 2020 at 12:52:10PM +0200, Toke Høiland-Jørgensen wrote: > > > It's a circular reference, obviously. > > Need to think through the complications and locking. > > Yup, will do so when I get back to this. One other implication of this > change: If we make the linked_prog completely dynamic you can no longer > do: > > link_fd = bpf_raw_tracepoint_open(prog); > close(link_fd); > link_fd = bpf_raw_tracepoint_open(prog): > > since after that close(), the original linked_prog will be gone. Unless > we always leave at least one linked_prog alive? But then we can't > guarantee that it's the target that was supplied on program load if it > was reattached. Is that acceptable? I think both options are fine. We can start with simple case where close would destroy the last link and if somebody complains we can keep 'at least one alive'. This is such low level implementation detail that I don't think any user can reliably count on it staying this way.
On Sun, Jul 19, 2020 at 10:02:48PM -0700, Andrii Nakryiko wrote: > On Thu, Jul 16, 2020 at 7:06 PM Alexei Starovoitov > <alexei.starovoitov@gmail.com> wrote: > > > > On Thu, Jul 16, 2020 at 12:50:05PM +0200, Toke Høiland-Jørgensen wrote: > > > Alexei Starovoitov <alexei.starovoitov@gmail.com> writes: > > > > > > > On Wed, Jul 15, 2020 at 03:09:02PM +0200, Toke Høiland-Jørgensen wrote: > > > >> > > > >> + if (tgt_prog_fd) { > > > >> + /* For now we only allow new targets for BPF_PROG_TYPE_EXT */ > > > >> + if (prog->type != BPF_PROG_TYPE_EXT || > > > >> + !btf_id) { > > > >> + err = -EINVAL; > > > >> + goto out_put_prog; > > > >> + } > > > >> + tgt_prog = bpf_prog_get(tgt_prog_fd); > > > >> + if (IS_ERR(tgt_prog)) { > > > >> + err = PTR_ERR(tgt_prog); > > > >> + tgt_prog = NULL; > > > >> + goto out_put_prog; > > > >> + } > > > >> + > > > >> + } else if (btf_id) { > > > >> + err = -EINVAL; > > > >> + goto out_put_prog; > > > >> + } else { > > > >> + btf_id = prog->aux->attach_btf_id; > > > >> + tgt_prog = prog->aux->linked_prog; > > > >> + if (tgt_prog) > > > >> + bpf_prog_inc(tgt_prog); /* we call bpf_prog_put() on link release */ > > > > > > > > so the first prog_load cmd will beholding the first target prog? > > > > This is complete non starter. > > > > You didn't mention such decision anywhere. > > > > The first ext prog will attach to the first dispatcher xdp prog, > > > > then that ext prog will multi attach to second dispatcher xdp prog and > > > > the first dispatcher prog will live in the kernel forever. > > > > > > Huh, yeah, you're right that's no good. Missing that was a think-o on my > > > part, sorry about that :/ > > > > > > > That's not what we discussed back in April. > > > > > > No, you mentioned turning aux->linked_prog into a list. However once I > > > started looking at it I figured it was better to actually have all this > > > (the trampoline and ref) as part of the bpf_link structure, since > > > logically they're related. > > > > > > But as you pointed out, the original reference sticks. So either that > > > needs to be removed, or I need to go back to the 'aux->linked_progs as a > > > list' idea. Any preference? > > > > Good question. Back then I was thinking about converting linked_prog into link > > list, since standalone single linked_prog is quite odd, because attaching ext > > prog to multiple tgt progs should have equivalent properties across all > > attachments. > > Back then bpf_link wasn't quite developed. > > Now I feel moving into bpf_tracing_link is better. > > I guess a link list of bpf_tracing_link-s from 'struct bpf_prog' might work. > > At prog load time we can do bpf_link_init() only (without doing bpf_link_prime) > > and keep this pre-populated bpf_link with target bpf prog and trampoline > > in a link list accessed from 'struct bpf_prog'. > > Then bpf_tracing_prog_attach() without extra tgt_prog_fd/btf_id would complete > > that bpf_tracing_link by calling bpf_link_prime() and bpf_link_settle() > > without allocating new one. > > Something like: > > struct bpf_tracing_link { > > struct bpf_link link; /* ext prog pointer is hidding in there */ > > enum bpf_attach_type attach_type; > > struct bpf_trampoline *tr; > > struct bpf_prog *tgt_prog; /* old aux->linked_prog */ > > }; > > > > ext prog -> aux -> link list of above bpf_tracing_link-s > > > > It's a circular reference, obviously. > > Need to think through the complications and locking. > > > > bpf_tracing_prog_attach() with tgt_prog_fd/btf_id will alloc new bpf_tracing_link > > and will add it to a link list. > > > > Just a rough idea. I wonder what Andrii thinks. > > > > I need to spend more time reading existing and new code to see all the > details, but I'll throw a slightly different proposal and let you guys > shoot it down. > > So, what if instead of having linked_prog (as bpf_prog *, refcnt'ed), > at BPF_PROG_LOAD time we just record the target prog's ID. BPF > verifier, when doing its target prog checks would attempt to get > bpf_prog * reference; if by that time the target program is gone, > fail, of course. If not, everything proceeds as is, at the end of > verification target_prog is put until attach time. > > Then at attach time, we either go with pre-recorded (in > prog->aux->linked_prog_id) target prog's ID or we get a new one from > RAW_TP_OPEN tgt_prog_fd. Either way, we bump refcnt on that target > prog and keep it with bpf_tracing_link (so link on detach would put > target_prog, that way it doesn't go away while EXT prog is attached). > Then do all the compatibility checks, and if everything works out, > bpf_tracing_link gets created, we record trampoline there, etc, etc. > Basically, instead of having an EXT prog holding a reference to the > target prog, only attachment (bpf_link) does that, which conceptually > also seems to make more sense to me. For verification we store prog ID > and don't hold target prog at all. > > > Now, there will be a problem once you attach EXT prog to a new XDP > root program and release a link against the original XDP root program. > First, I hope I understand the desired sequence right, here's an > example: > > 1. load XDP root prog X > 2. load EXT prog with target prog X > 3. attach EXT prog to prog X > 4. load XDP root prog Y > 5. attach EXT prog to prog Y (Y and X should be "compatible") > 6. detach prog X (close bpf_link) > > Is that the right sequence? > > If yes, then the problem with storing ID of prog X in EXT > prog->aux->linked_prog_id is that you won't be able to re-attach to > new prog Z, because there won't be anything to check compatibility > against (prog X will be long time gone). > > So we can do two things here: > > 1. on attach, replace ext_prog->aux->linked_prog_id with the latest > attached prog (prog Y ID from above example) > 2. instead of recording target program FD/ID, capture BTF FD and/or > enough BTF information for checking compatibility. > > Approach 2) seems like conceptually the right thing to do (record type > info we care about, not an **instance** of BPF program, compatible > with that type info), but technically might be harder. I've read your proposal couple times and still don't get what you're trying to solve with either ID or BTF info recording. So that target prog doesn't get refcnt-ed? What's a problem with it? Currently it's being refcnt-d in aux->linked_prog. What I'm proposing about is to convert aux->linked_prog into a link list of bpf_tracing_links which will contain linked_prog inside. Conceptually that's what bpf_link is doing. It links two progs. EXT prog is recorded in 'struct bpf_link' and the target prog is recorded in 'struct bpf_tracing_link'. So from bpf_link perspective everything seems clean to me. The link list of bpf_tracing_link-s in EXT_prog->aux is only to preserve existing api of prog_load cmd. As far as step 5: attach EXT prog to prog Y (Y and X should be "compatible") The chance of failure there should be minimal. libxdp/libdispatcher will prepare rootlet XDP prog. It should really make sure that Y and X are compatible. This should be invisible to users. In addition we still need bpf_link_update_hook() I was talking about in April. The full sequence is: first user process: 1. load XDP root prog X 1' root_link = attach X to eth0 2. load EXT prog with target prog X 3. app1_link_fd = attach EXT prog to prog X second user process: 4. load XDP root prog Y 4'. find EXT prog of the first user process 5. app2_link_fd = attach EXT prog to prog Y (Y and X should be "compatible") 6. bpf_link_update(root_link, X, Y); // now packet flows into Y and into EXT // while EXT is attached in two places 7. app1_link_fd' = FD in second process that points to the same tracing link as app1_link_fd in the first process. bpf_link_update_hook(app1_link_fd', app2_link_fd) the last operation need to update bpf_tracing_link that is held by app1 (which is the first user process) from the second user process. It needs to retarget (update_hook) inside bpf_tracing_link from X to Y. Since the processes are more or less not aware of each other. One firewall holds link_fd that connects EXT to X, but the second firewall (via libxdp) is updaing that tracing link to re-hook EXT into Y.
On Mon, Jul 20, 2020 at 4:35 PM Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote: > > On Sun, Jul 19, 2020 at 10:02:48PM -0700, Andrii Nakryiko wrote: > > On Thu, Jul 16, 2020 at 7:06 PM Alexei Starovoitov > > <alexei.starovoitov@gmail.com> wrote: > > > > > > On Thu, Jul 16, 2020 at 12:50:05PM +0200, Toke Høiland-Jørgensen wrote: > > > > Alexei Starovoitov <alexei.starovoitov@gmail.com> writes: > > > > > > > > > On Wed, Jul 15, 2020 at 03:09:02PM +0200, Toke Høiland-Jørgensen wrote: > > > > >> > > > > >> + if (tgt_prog_fd) { > > > > >> + /* For now we only allow new targets for BPF_PROG_TYPE_EXT */ > > > > >> + if (prog->type != BPF_PROG_TYPE_EXT || > > > > >> + !btf_id) { > > > > >> + err = -EINVAL; > > > > >> + goto out_put_prog; > > > > >> + } > > > > >> + tgt_prog = bpf_prog_get(tgt_prog_fd); > > > > >> + if (IS_ERR(tgt_prog)) { > > > > >> + err = PTR_ERR(tgt_prog); > > > > >> + tgt_prog = NULL; > > > > >> + goto out_put_prog; > > > > >> + } > > > > >> + > > > > >> + } else if (btf_id) { > > > > >> + err = -EINVAL; > > > > >> + goto out_put_prog; > > > > >> + } else { > > > > >> + btf_id = prog->aux->attach_btf_id; > > > > >> + tgt_prog = prog->aux->linked_prog; > > > > >> + if (tgt_prog) > > > > >> + bpf_prog_inc(tgt_prog); /* we call bpf_prog_put() on link release */ > > > > > > > > > > so the first prog_load cmd will beholding the first target prog? > > > > > This is complete non starter. > > > > > You didn't mention such decision anywhere. > > > > > The first ext prog will attach to the first dispatcher xdp prog, > > > > > then that ext prog will multi attach to second dispatcher xdp prog and > > > > > the first dispatcher prog will live in the kernel forever. > > > > > > > > Huh, yeah, you're right that's no good. Missing that was a think-o on my > > > > part, sorry about that :/ > > > > > > > > > That's not what we discussed back in April. > > > > > > > > No, you mentioned turning aux->linked_prog into a list. However once I > > > > started looking at it I figured it was better to actually have all this > > > > (the trampoline and ref) as part of the bpf_link structure, since > > > > logically they're related. > > > > > > > > But as you pointed out, the original reference sticks. So either that > > > > needs to be removed, or I need to go back to the 'aux->linked_progs as a > > > > list' idea. Any preference? > > > > > > Good question. Back then I was thinking about converting linked_prog into link > > > list, since standalone single linked_prog is quite odd, because attaching ext > > > prog to multiple tgt progs should have equivalent properties across all > > > attachments. > > > Back then bpf_link wasn't quite developed. > > > Now I feel moving into bpf_tracing_link is better. > > > I guess a link list of bpf_tracing_link-s from 'struct bpf_prog' might work. > > > At prog load time we can do bpf_link_init() only (without doing bpf_link_prime) > > > and keep this pre-populated bpf_link with target bpf prog and trampoline > > > in a link list accessed from 'struct bpf_prog'. > > > Then bpf_tracing_prog_attach() without extra tgt_prog_fd/btf_id would complete > > > that bpf_tracing_link by calling bpf_link_prime() and bpf_link_settle() > > > without allocating new one. > > > Something like: > > > struct bpf_tracing_link { > > > struct bpf_link link; /* ext prog pointer is hidding in there */ > > > enum bpf_attach_type attach_type; > > > struct bpf_trampoline *tr; > > > struct bpf_prog *tgt_prog; /* old aux->linked_prog */ > > > }; > > > > > > ext prog -> aux -> link list of above bpf_tracing_link-s > > > > > > It's a circular reference, obviously. > > > Need to think through the complications and locking. > > > > > > bpf_tracing_prog_attach() with tgt_prog_fd/btf_id will alloc new bpf_tracing_link > > > and will add it to a link list. > > > > > > Just a rough idea. I wonder what Andrii thinks. > > > > > > > I need to spend more time reading existing and new code to see all the > > details, but I'll throw a slightly different proposal and let you guys > > shoot it down. > > > > So, what if instead of having linked_prog (as bpf_prog *, refcnt'ed), > > at BPF_PROG_LOAD time we just record the target prog's ID. BPF > > verifier, when doing its target prog checks would attempt to get > > bpf_prog * reference; if by that time the target program is gone, > > fail, of course. If not, everything proceeds as is, at the end of > > verification target_prog is put until attach time. > > > > Then at attach time, we either go with pre-recorded (in > > prog->aux->linked_prog_id) target prog's ID or we get a new one from > > RAW_TP_OPEN tgt_prog_fd. Either way, we bump refcnt on that target > > prog and keep it with bpf_tracing_link (so link on detach would put > > target_prog, that way it doesn't go away while EXT prog is attached). > > Then do all the compatibility checks, and if everything works out, > > bpf_tracing_link gets created, we record trampoline there, etc, etc. > > Basically, instead of having an EXT prog holding a reference to the > > target prog, only attachment (bpf_link) does that, which conceptually > > also seems to make more sense to me. For verification we store prog ID > > and don't hold target prog at all. > > > > > > Now, there will be a problem once you attach EXT prog to a new XDP > > root program and release a link against the original XDP root program. > > First, I hope I understand the desired sequence right, here's an > > example: > > > > 1. load XDP root prog X > > 2. load EXT prog with target prog X > > 3. attach EXT prog to prog X > > 4. load XDP root prog Y > > 5. attach EXT prog to prog Y (Y and X should be "compatible") > > 6. detach prog X (close bpf_link) > > > > Is that the right sequence? > > > > If yes, then the problem with storing ID of prog X in EXT > > prog->aux->linked_prog_id is that you won't be able to re-attach to > > new prog Z, because there won't be anything to check compatibility > > against (prog X will be long time gone). > > > > So we can do two things here: > > > > 1. on attach, replace ext_prog->aux->linked_prog_id with the latest > > attached prog (prog Y ID from above example) > > 2. instead of recording target program FD/ID, capture BTF FD and/or > > enough BTF information for checking compatibility. > > > > Approach 2) seems like conceptually the right thing to do (record type > > info we care about, not an **instance** of BPF program, compatible > > with that type info), but technically might be harder. > > I've read your proposal couple times and still don't get what you're > trying to solve with either ID or BTF info recording. > So that target prog doesn't get refcnt-ed? What's a problem with it? > Currently it's being refcnt-d in aux->linked_prog. > What I'm proposing about is to convert aux->linked_prog into a link list > of bpf_tracing_links which will contain linked_prog inside. > Conceptually that's what bpf_link is doing. It links two progs. > EXT prog is recorded in 'struct bpf_link' and > the target prog is recorded in 'struct bpf_tracing_link'. > So from bpf_link perspective everything seems clean to me. > The link list of bpf_tracing_link-s in EXT_prog->aux is only to preserve > existing api of prog_load cmd. Right, I wanted to avoid taking a refcnt on aux->linked_prog during PROG_LOAD. The reason for that was (and still is) that I don't get who and when has to bpf_prog_put() original aux->linked_prog to allow the prog X to be freed. I.e., after you re-attach to prog Y, how prog X is released (assuming no active bpf_link is keeping it from being freed)? That's my biggest confusion right now. I also didn't like the idea of half-creating bpf_tracing_link on PROG_LOAD and then turning it into a real link with bpf_link_settle on attach. That sounded like a hack to me. But now I'm also confused why we need to turn aux->linked_prog into a list. Seems like we need it only for old-style attach that doesn't specify tgt_prog_fd, no? Only in that case we'll use aux->linked_prog. Otherwise we know the target prog from tgt_prog_fd. So I'll be honest that I don't get the whole idea of maintaining a list of bpf_tracing_links. It seems like it should be possible to make bpf_tracing_link decoupled from any prog's aux and have their own independent lifetime. > > As far as step 5: attach EXT prog to prog Y (Y and X should be "compatible") > The chance of failure there should be minimal. libxdp/libdispatcher will > prepare rootlet XDP prog. It should really make sure that Y and X are compatible. > This should be invisible to users. Right, of course, but the kernel needs to validate that anyways, which is why I pointed that out. Or are you saying we should just assume that they are valid? > > In addition we still need bpf_link_update_hook() I was talking about in April. > The full sequence is: > first user process: > 1. load XDP root prog X > 1' root_link = attach X to eth0 > 2. load EXT prog with target prog X > 3. app1_link_fd = attach EXT prog to prog X > second user process: > 4. load XDP root prog Y > 4'. find EXT prog of the first user process > 5. app2_link_fd = attach EXT prog to prog Y (Y and X should be "compatible") > 6. bpf_link_update(root_link, X, Y); // now packet flows into Y and into EXT > // while EXT is attached in two places > 7. app1_link_fd' = FD in second process that points to the same tracing link > as app1_link_fd in the first process. > bpf_link_update_hook(app1_link_fd', app2_link_fd) > the last operation need to update bpf_tracing_link that is held by app1 > (which is the first user process) from the second user process. It needs to > retarget (update_hook) inside bpf_tracing_link from X to Y. > Since the processes are more or less not aware of each other. > One firewall holds link_fd that connects EXT to X, > but the second firewall (via libxdp) is updaing that tracing link > to re-hook EXT into Y. Yeah, should be doable given that bpf_trampoline is independently refcounted.
On Mon, Jul 20, 2020 at 08:48:04PM -0700, Andrii Nakryiko wrote: > > Right, I wanted to avoid taking a refcnt on aux->linked_prog during > PROG_LOAD. The reason for that was (and still is) that I don't get who > and when has to bpf_prog_put() original aux->linked_prog to allow the > prog X to be freed. I.e., after you re-attach to prog Y, how prog X is > released (assuming no active bpf_link is keeping it from being freed)? > That's my biggest confusion right now. > > I also didn't like the idea of half-creating bpf_tracing_link on > PROG_LOAD and then turning it into a real link with bpf_link_settle on > attach. That sounded like a hack to me. The link is kinda already created during prog_load of EXT type. Typically prog_load needs expected_attach_type that points to something that is not going to disappear. In case of EXT progs the situation is different, since the target can be unloaded. So the prog load cmd not only validates the program extension but links target and ext prog together at the same time. The target prog will be held until EXT prog is unloaded. I think it's important to preserve this semantics to the users that the target prog is frozen at load time and no races are going to happen later. Otherwise it leads to double validation at attach time and races. What raw_tp_open is doing right now is a hack. It allocates bpf_tracing_link, registers it into link_idr and activates trampoline, but in reality that link is already there. I think we can clean it up by creating bpf_tracing_link at prog load time. Whether to register it at that time into link_idr is up to discussion. (I think probably not). Then raw_tp_open will activate that allocated bpf_tracing_link via trampoline, _remove_ it from aux->linked_tracing_link (old linked_prog) and return FD to the user. So this partially created link at load_time will become complete link and close of the link will detach EXT from the target and the target can be unloaded. (Currently the target cannot be unloaded until EXT is loaded which is not great). The EXT_prog->aux->linked_tracing_link (old linked_prog) will exist only during the time between prog_load and raw_tp_open without args. I think that would be a good clean up. Then multi attach of EXT progs is clean too. New raw_tp_open with tgt_prog_fd/tgt_btf_id will validate EXT against the new target, link them via new bpf_tracing_link, activate it via trampoline and return FD. No link list anywhere. Note that this second validation of EXT against new target is light weight comparing to the load. The first load goes through all EXT instructions with verifier ctx of the target prog. The second validation needs to compare BTF proto tgr_prog_fd+tgt_btf_id with EXT's btf_id only (and check tgt_prog_fd->type/expected_attach_type). Since EXT was loaded earlier it has valid insns. So if you're thinking "cannot we validate insns at load time, but then remember tgt stuff instead of creating a partial link, and double validate BTF at raw_tp_open when it's called without tgt_prog_fd?" The answer is "yes, we can", but double validation of BTF I think is just a waste of cycles, when tgt prog could have been held a bit between load and attach. And it's race free. Whereas if we remember target prog_id at load then raw_tp_open is shooting in the dark. Unlikely, but that prog_id could have been reused.
On Tue, Jul 21, 2020 at 5:29 PM Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote: > > On Mon, Jul 20, 2020 at 08:48:04PM -0700, Andrii Nakryiko wrote: > > > > Right, I wanted to avoid taking a refcnt on aux->linked_prog during > > PROG_LOAD. The reason for that was (and still is) that I don't get who > > and when has to bpf_prog_put() original aux->linked_prog to allow the > > prog X to be freed. I.e., after you re-attach to prog Y, how prog X is > > released (assuming no active bpf_link is keeping it from being freed)? > > That's my biggest confusion right now. > > > > I also didn't like the idea of half-creating bpf_tracing_link on > > PROG_LOAD and then turning it into a real link with bpf_link_settle on > > attach. That sounded like a hack to me. > > The link is kinda already created during prog_load of EXT type. > Typically prog_load needs expected_attach_type that points to something > that is not going to disappear. In case of EXT progs the situation is different, > since the target can be unloaded. So the prog load cmd not only validates the > program extension but links target and ext prog together at the same time. > The target prog will be held until EXT prog is unloaded. > I think it's important to preserve this semantics to the users that the target prog > is frozen at load time and no races are going to happen later. > Otherwise it leads to double validation at attach time and races. Yes, I was confused because of the step you describe below (removal of linked_prog from aux->linked_prog and moving it into BPF link on attach). With that move, it makes sense to have that bpf_prog refcnt bump on load, makes everything simpler. > > What raw_tp_open is doing right now is a hack. It allocates bpf_tracing_link, > registers it into link_idr and activates trampoline, but in reality that link is already there. That's an interesting way to look at this. For me it always felt normal, because real linking is happening inside bpf_trampoline_link_prog(). But it's a minor technicality, it's not important enough to discuss. > I think we can clean it up by creating bpf_tracing_link at prog load time. > Whether to register it at that time into link_idr is up to discussion. > (I think probably not). yeah, I agree, let's not > Then raw_tp_open will activate that allocated bpf_tracing_link via trampoline, > _remove_ it from aux->linked_tracing_link (old linked_prog) and > return FD to the user. Ok, so this move from aux->linked_prog into BPF link itself is what was missing, I wasn't sure whether you proposed doing that. With that it makes more sense, even if it's a bit "asymmetrical" in that you can attach only once using old-style EXT attach, but can attach and re-attach many times if you specify tgt_prog_fd. But I think it's also fine, I just wish we always required tgt_prog_fd... > So this partially created link at load_time will become complete link and > close of the link will detach EXT from the target and the target can be unloaded. > (Currently the target cannot be unloaded until EXT is loaded which is not great). > The EXT_prog->aux->linked_tracing_link (old linked_prog) will exist only during > the time between prog_load and raw_tp_open without args. > I think that would be a good clean up. yep, I agree > Then multi attach of EXT progs is clean too. > New raw_tp_open with tgt_prog_fd/tgt_btf_id will validate EXT against the new target, > link them via new bpf_tracing_link, activate it via trampoline and return FD. > No link list anywhere. > Note that this second validation of EXT against new target is light weight comparing > to the load. The first load goes through all EXT instructions with verifier ctx of > the target prog. The second validation needs to compare BTF proto tgr_prog_fd+tgt_btf_id > with EXT's btf_id only (and check tgt_prog_fd->type/expected_attach_type). > Since EXT was loaded earlier it has valid insns. Right, this matches what I understood about this re-attach logic, great. > So if you're thinking "cannot we validate insns at load time, but then remember > tgt stuff instead of creating a partial link, and double validate BTF at raw_tp_open > when it's called without tgt_prog_fd?" > The answer is "yes, we can", but double validation of BTF I think is just a waste of cycles, > when tgt prog could have been held a bit between load and attach. > And it's race free. Whereas if we remember target prog_id at load then raw_tp_open is > shooting in the dark. Unlikely, but that prog_id could have been reused. Sure, I agree that there is no need to complicate everything with ID (now that I understand the proposal better). My confusion came from two things: 1. Current API usage would allow PROG_LOAD of EXT program, would take refcnt on target program. RAW_TP_OPEN + close link to detach. Then, if necessary again RAW_TP_OPEN, and the second (and subsequent times) would succeed. But it seems like we are changing that to only allow one RAW_TP_OPEN if one doesn't provide tgt_prog_fd. I think it's acceptable, but it wasn't clear to me. 2. You were talking about turning aux->linked_prog into a linked list of bpf_tracing_links, but I couldn't see the point. In your latest version you didn't talk about this list of links, so it seems like that's not necessary after all, right? I like that. So I think we are in agreement overall. Just one technical moment, let me double-check my understanding again. You seem to be favoring pre-creating bpf_tracing_link because there is both tgt_prog (that we refcnt on EXT prog load) and we also lookup and initialize trampoline in check_attach_btf_id(). Of course there is also expected_attach_type, but that's a trivial known enum, so I'm ignoring it. So because we have those two entities which on attach are supposed to be owned by bpf_tracing_link, you just want to pre-create a "shell" of bpf_tracing_link, and then on attach complete its initialization, is that right? That certainly simplifies attach logic a bit and I think it's fine. But also it seems like we'll be creating and initializing a **different** trampoline on re-attach to prog Y. Now attach will do different things depending on whether tgt_prog_fd is provided or not. So I wonder why not just unify this trampoline initialization and do it at attach time? For all valid EXT use cases today the result is the same: everything still works the same. For cases where we for some reason can't initialize bpf_trampoline, that failure will happen at attach time, not on a load time. But that seems fine, because that's going to be the case for re-attach (with tgt_prog_fd) anyways. Looking through the verifier code, it doesn't seem like it does anything much with prog->aux->trampoline, unless I missed something, so it must be ok to do it after load? It also seems to avoid this double BTF validation concern you have, no? Thoughts? Regardless, thanks for elaborating, I think I get it end-to-end now.
On Tue, Jul 21, 2020 at 11:02:04PM -0700, Andrii Nakryiko wrote: > > Just one technical moment, let me double-check my understanding again. > You seem to be favoring pre-creating bpf_tracing_link because there is > both tgt_prog (that we refcnt on EXT prog load) and we also lookup and > initialize trampoline in check_attach_btf_id(). Of course there is > also expected_attach_type, but that's a trivial known enum, so I'm > ignoring it. So because we have those two entities which on attach are > supposed to be owned by bpf_tracing_link, you just want to pre-create > a "shell" of bpf_tracing_link, and then on attach complete its > initialization, is that right? That certainly simplifies attach logic > a bit and I think it's fine. Right. It just feels cleaner to group objects for the same purpose. > But also it seems like we'll be creating and initializing a > **different** trampoline on re-attach to prog Y. Now attach will do > different things depending on whether tgt_prog_fd is provided or not. Right, but it can be a common helper instead that is creating a 'shell' of bpf_tracing_link. Calling it from prog_load and from raw_tp_open is imo clean enough. No copy paste of code. If that was the concern. > So I wonder why not just unify this trampoline initialization and do > it at attach time? For all valid EXT use cases today the result is the > same: everything still works the same. For cases where we for some > reason can't initialize bpf_trampoline, that failure will happen at > attach time, not on a load time. But that seems fine, because that's > going to be the case for re-attach (with tgt_prog_fd) anyways. Looking > through the verifier code, it doesn't seem like it does anything much > with prog->aux->trampoline, unless I missed something, so it must be > ok to do it after load? It also seems to avoid this double BTF > validation concern you have, no? Thoughts? bpf_trampoline_link_prog() is attach time call. but bpf_trampoline_lookup() is one to one with the target. When load_prog holds the target it's a right time to prep all things about the target. Notice that key into trampoline_lookup() is key = ((u64)aux->id) << 32 | btf_id; of the target prog. Can it be done at raw_tp_open time? I guess so, but feels kinda weird to me to split the target preparation job into several places.
On Wed, Jul 22, 2020 at 5:32 PM Alexei Starovoitov <alexei.starovoitov@gmail.com> wrote: > > On Tue, Jul 21, 2020 at 11:02:04PM -0700, Andrii Nakryiko wrote: > > > > Just one technical moment, let me double-check my understanding again. > > You seem to be favoring pre-creating bpf_tracing_link because there is > > both tgt_prog (that we refcnt on EXT prog load) and we also lookup and > > initialize trampoline in check_attach_btf_id(). Of course there is > > also expected_attach_type, but that's a trivial known enum, so I'm > > ignoring it. So because we have those two entities which on attach are > > supposed to be owned by bpf_tracing_link, you just want to pre-create > > a "shell" of bpf_tracing_link, and then on attach complete its > > initialization, is that right? That certainly simplifies attach logic > > a bit and I think it's fine. > > Right. It just feels cleaner to group objects for the same purpose. > > > But also it seems like we'll be creating and initializing a > > **different** trampoline on re-attach to prog Y. Now attach will do > > different things depending on whether tgt_prog_fd is provided or not. > > Right, but it can be a common helper instead that is creating a 'shell' > of bpf_tracing_link. > Calling it from prog_load and from raw_tp_open is imo clean enough. > No copy paste of code. > If that was the concern. > > > So I wonder why not just unify this trampoline initialization and do > > it at attach time? For all valid EXT use cases today the result is the > > same: everything still works the same. For cases where we for some > > reason can't initialize bpf_trampoline, that failure will happen at > > attach time, not on a load time. But that seems fine, because that's > > going to be the case for re-attach (with tgt_prog_fd) anyways. Looking > > through the verifier code, it doesn't seem like it does anything much > > with prog->aux->trampoline, unless I missed something, so it must be > > ok to do it after load? It also seems to avoid this double BTF > > validation concern you have, no? Thoughts? > > bpf_trampoline_link_prog() is attach time call. > but bpf_trampoline_lookup() is one to one with the target. > When load_prog holds the target it's a right time to prep all things > about the target. Notice that key into trampoline_lookup() is > key = ((u64)aux->id) << 32 | btf_id; > of the target prog. > Can it be done at raw_tp_open time? > I guess so, but feels kinda weird to me to split the target preparation > job into several places. ok, sounds good to me
diff --git a/include/linux/bpf.h b/include/linux/bpf.h index ca3a2a1812c2..ca7a710cdb57 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -568,8 +568,8 @@ static __always_inline unsigned int bpf_dispatcher_nop_func( } #ifdef CONFIG_BPF_JIT struct bpf_trampoline *bpf_trampoline_lookup(u64 key); -int bpf_trampoline_link_prog(struct bpf_prog *prog); -int bpf_trampoline_unlink_prog(struct bpf_prog *prog); +int bpf_trampoline_link_prog(struct bpf_prog *prog, struct bpf_trampoline *tr); +int bpf_trampoline_unlink_prog(struct bpf_prog *prog, struct bpf_trampoline *tr); int bpf_trampoline_get(u64 key, void *addr, struct btf_func_model *fmodel, struct bpf_trampoline **trampoline); @@ -621,11 +621,13 @@ static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { return NULL; } -static inline int bpf_trampoline_link_prog(struct bpf_prog *prog) +static inline int bpf_trampoline_link_prog(struct bpf_prog *prog, + struct bpf_trampoline *tr) { return -ENOTSUPP; } -static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog) +static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog, + struct bpf_trampoline *tr) { return -ENOTSUPP; } @@ -697,10 +699,12 @@ struct bpf_prog_aux { bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ bool func_proto_unreliable; enum bpf_tramp_prog_type trampoline_prog_type; - struct bpf_trampoline *trampoline; struct hlist_node tramp_hlist; /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ const struct btf_type *attach_func_proto; + /* target BPF prog types for trace programs */ + enum bpf_prog_type tgt_prog_type; + enum bpf_attach_type tgt_attach_type; /* function name for valid attach_btf_id */ const char *attach_func_name; struct bpf_prog **func; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 5e386389913a..01a0814a8cfe 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -574,8 +574,10 @@ union bpf_attr { } query; struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */ - __u64 name; - __u32 prog_fd; + __u64 name; + __u32 prog_fd; + __u32 tgt_prog_fd; + __u32 tgt_btf_id; } raw_tracepoint; struct { /* anonymous struct for BPF_BTF_LOAD */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 9df4cc9a2907..ed4d7259316a 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2087,7 +2087,6 @@ static void bpf_prog_free_deferred(struct work_struct *work) if (aux->prog->has_callchain_buf) put_callchain_buffers(); #endif - bpf_trampoline_put(aux->trampoline); for (i = 0; i < aux->func_cnt; i++) bpf_jit_free(aux->func[i]); if (aux->func_cnt) { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7ea9dfbebd8c..2301d7485b4d 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <linux/bpf_trace.h> #include <linux/bpf_lirc.h> +#include <linux/bpf_verifier.h> #include <linux/btf.h> #include <linux/syscalls.h> #include <linux/slab.h> @@ -2484,11 +2485,21 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd) struct bpf_tracing_link { struct bpf_link link; enum bpf_attach_type attach_type; + struct bpf_trampoline *trampoline; + struct bpf_prog *tgt_prog; }; static void bpf_tracing_link_release(struct bpf_link *link) { - WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog)); + struct bpf_tracing_link *tr_link = + container_of(link, struct bpf_tracing_link, link); + + WARN_ON_ONCE(bpf_trampoline_unlink_prog(link->prog, + tr_link->trampoline)); + + bpf_trampoline_put(tr_link->trampoline); + if (tr_link->tgt_prog) + bpf_prog_put(tr_link->tgt_prog); } static void bpf_tracing_link_dealloc(struct bpf_link *link) @@ -2528,10 +2539,17 @@ static const struct bpf_link_ops bpf_tracing_link_lops = { .fill_link_info = bpf_tracing_link_fill_link_info, }; -static int bpf_tracing_prog_attach(struct bpf_prog *prog) +static int bpf_tracing_prog_attach(struct bpf_prog *prog, + int tgt_prog_fd, + u32 btf_id) { struct bpf_link_primer link_primer; + struct bpf_trampoline *tr = NULL; + struct bpf_prog *tgt_prog = NULL; struct bpf_tracing_link *link; + struct btf_func_model fmodel; + long addr; + u64 key; int err; switch (prog->type) { @@ -2560,6 +2578,43 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog) goto out_put_prog; } + if (tgt_prog_fd) { + /* For now we only allow new targets for BPF_PROG_TYPE_EXT */ + if (prog->type != BPF_PROG_TYPE_EXT || + !btf_id) { + err = -EINVAL; + goto out_put_prog; + } + tgt_prog = bpf_prog_get(tgt_prog_fd); + if (IS_ERR(tgt_prog)) { + err = PTR_ERR(tgt_prog); + tgt_prog = NULL; + goto out_put_prog; + } + + } else if (btf_id) { + err = -EINVAL; + goto out_put_prog; + } else { + btf_id = prog->aux->attach_btf_id; + tgt_prog = prog->aux->linked_prog; + if (tgt_prog) + bpf_prog_inc(tgt_prog); /* we call bpf_prog_put() on link release */ + } + err = bpf_check_attach_target(NULL, prog, tgt_prog, btf_id, + &fmodel, &addr, NULL, NULL); + if (err) + goto out_put_prog; + + if (tgt_prog) + key = ((u64)tgt_prog->aux->id) << 32 | btf_id; + else + key = btf_id; + + err = bpf_trampoline_get(key, (void *)addr, &fmodel, &tr); + if (err) + goto out_put_prog; + link = kzalloc(sizeof(*link), GFP_USER); if (!link) { err = -ENOMEM; @@ -2575,15 +2630,21 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog) goto out_put_prog; } - err = bpf_trampoline_link_prog(prog); + err = bpf_trampoline_link_prog(prog, tr); if (err) { bpf_link_cleanup(&link_primer); goto out_put_prog; } + link->trampoline = tr; + link->tgt_prog = tgt_prog; return bpf_link_settle(&link_primer); out_put_prog: bpf_prog_put(prog); + if (tgt_prog) + bpf_prog_put(tgt_prog); + if (tr) + bpf_trampoline_put(tr); return err; } @@ -2661,7 +2722,7 @@ static const struct bpf_link_ops bpf_raw_tp_link_lops = { .fill_link_info = bpf_raw_tp_link_fill_link_info, }; -#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd +#define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.tgt_btf_id static int bpf_raw_tracepoint_open(const union bpf_attr *attr) { @@ -2685,8 +2746,9 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) case BPF_PROG_TYPE_EXT: case BPF_PROG_TYPE_LSM: if (attr->raw_tracepoint.name) { - /* The attach point for this category of programs - * should be specified via btf_id during program load. + /* The attach point for this category of programs should + * be specified via btf_id during program load, or using + * tgt_btf_id. */ err = -EINVAL; goto out_put_prog; @@ -2696,7 +2758,9 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) tp_name = prog->aux->attach_func_name; break; } - return bpf_tracing_prog_attach(prog); + return bpf_tracing_prog_attach(prog, + attr->raw_tracepoint.tgt_prog_fd, + attr->raw_tracepoint.tgt_btf_id); case BPF_PROG_TYPE_RAW_TRACEPOINT: case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: if (strncpy_from_user(buf, diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index fadfa330f728..40797405f1a0 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -256,14 +256,13 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog) } } -int bpf_trampoline_link_prog(struct bpf_prog *prog) +int bpf_trampoline_link_prog(struct bpf_prog *prog, + struct bpf_trampoline *tr) { enum bpf_tramp_prog_type kind; - struct bpf_trampoline *tr; int err = 0; int cnt; - tr = prog->aux->trampoline; kind = bpf_attach_type_to_tramp(prog); mutex_lock(&tr->mutex); if (tr->extension_prog) { @@ -296,7 +295,7 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog) } hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]); tr->progs_cnt[kind]++; - err = bpf_trampoline_update(prog->aux->trampoline); + err = bpf_trampoline_update(tr); if (err) { hlist_del(&prog->aux->tramp_hlist); tr->progs_cnt[kind]--; @@ -307,13 +306,12 @@ int bpf_trampoline_link_prog(struct bpf_prog *prog) } /* bpf_trampoline_unlink_prog() should never fail. */ -int bpf_trampoline_unlink_prog(struct bpf_prog *prog) +int bpf_trampoline_unlink_prog(struct bpf_prog *prog, + struct bpf_trampoline *tr) { enum bpf_tramp_prog_type kind; - struct bpf_trampoline *tr; int err; - tr = prog->aux->trampoline; kind = bpf_attach_type_to_tramp(prog); mutex_lock(&tr->mutex); if (kind == BPF_TRAMP_REPLACE) { @@ -325,7 +323,7 @@ int bpf_trampoline_unlink_prog(struct bpf_prog *prog) } hlist_del(&prog->aux->tramp_hlist); tr->progs_cnt[kind]--; - err = bpf_trampoline_update(prog->aux->trampoline); + err = bpf_trampoline_update(tr); out: mutex_unlock(&tr->mutex); return err; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a1ab7298f53b..1222031fc2e9 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -10896,6 +10896,12 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, if (!btf_type_is_func_proto(t)) return -EINVAL; + if ((prog->aux->tgt_prog_type && + prog->aux->tgt_prog_type != tgt_prog->type) || + (prog->aux->tgt_attach_type && + prog->aux->tgt_attach_type != tgt_prog->expected_attach_type)) + return -EINVAL; + if (tgt_prog && conservative) t = NULL; @@ -10938,7 +10944,6 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) const char *tname; long addr; int ret; - u64 key; if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) return check_struct_ops_btf_id(env); @@ -10954,13 +10959,13 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) return ret; if (tgt_prog) { + prog->aux->tgt_prog_type = tgt_prog->type; + prog->aux->tgt_attach_type = tgt_prog->expected_attach_type; + if (prog->type == BPF_PROG_TYPE_EXT) { env->ops = bpf_verifier_ops[tgt_prog->type]; prog->expected_attach_type = tgt_prog->expected_attach_type; } - key = ((u64)tgt_prog->aux->id) << 32 | btf_id; - } else { - key = btf_id; } prog->aux->attach_func_proto = t; @@ -10991,8 +10996,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) if (ret < 0) return ret; } - return bpf_trampoline_get(key, (void *)addr, &fmodel, - &prog->aux->trampoline); + return 0; } }