Merge branch 'writeable-bpf-tracepoints'
Matt Mullins says:
====================
This adds an opt-in interface for tracepoints to expose a writable context to
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE programs that are attached, while
supporting read-only access from existing BPF_PROG_TYPE_RAW_TRACEPOINT
programs, as well as from non-BPF-based tracepoints.
The initial motivation is to support tracing that can be observed from the
remote end of an NBD socket, e.g. by adding flags to the struct nbd_request
header. Earlier attempts included adding an NBD-specific tracepoint fd, but in
code review, I was recommended to implement it more generically -- as a result,
this patchset is far simpler than my initial try.
v4->v5:
* rebased onto bpf-next/master and fixed merge conflicts
* "tools: sync bpf.h" also syncs comments that have previously changed
in bpf-next
v3->v4:
* fixed a silly copy/paste typo in include/trace/events/bpf_test_run.h
(_TRACE_NBD_H -> _TRACE_BPF_TEST_RUN_H)
* fixed incorrect/misleading wording in patch 1's commit message,
since the pointer cannot be directly dereferenced in a
BPF_PROG_TYPE_RAW_TRACEPOINT
* cleaned up the error message wording if the prog_tests fail
* Addressed feedback from Yonghong
* reject non-pointer-sized accesses to the buffer pointer
* use sizeof(struct nbd_request) as one-byte-past-the-end in
raw_tp_writable_reject_nbd_invalid.c
* use BPF_MOV64_IMM instead of BPF_LD_IMM64
v2->v3:
* Andrew addressed Josef's comments:
* C-style commenting in nbd.c
* Collapsed identical events into a single DECLARE_EVENT_CLASS.
This saves about 2kB of kernel text
v1->v2:
* add selftests
* sync tools/include/uapi/linux/bpf.h
* reject variable offset into the buffer
* add string representation of PTR_TO_TP_BUFFER to reg_type_str
====================
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
This commit is contained in:
@@ -10741,6 +10741,7 @@ L: linux-block@vger.kernel.org
|
|||||||
L: nbd@other.debian.org
|
L: nbd@other.debian.org
|
||||||
F: Documentation/blockdev/nbd.txt
|
F: Documentation/blockdev/nbd.txt
|
||||||
F: drivers/block/nbd.c
|
F: drivers/block/nbd.c
|
||||||
|
F: include/trace/events/nbd.h
|
||||||
F: include/uapi/linux/nbd.h
|
F: include/uapi/linux/nbd.h
|
||||||
|
|
||||||
NETWORK DROP MONITOR
|
NETWORK DROP MONITOR
|
||||||
|
|||||||
@@ -44,6 +44,9 @@
|
|||||||
#include <linux/nbd-netlink.h>
|
#include <linux/nbd-netlink.h>
|
||||||
#include <net/genetlink.h>
|
#include <net/genetlink.h>
|
||||||
|
|
||||||
|
#define CREATE_TRACE_POINTS
|
||||||
|
#include <trace/events/nbd.h>
|
||||||
|
|
||||||
static DEFINE_IDR(nbd_index_idr);
|
static DEFINE_IDR(nbd_index_idr);
|
||||||
static DEFINE_MUTEX(nbd_index_mutex);
|
static DEFINE_MUTEX(nbd_index_mutex);
|
||||||
static int nbd_total_devices = 0;
|
static int nbd_total_devices = 0;
|
||||||
@@ -510,6 +513,10 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
|
|||||||
if (sent) {
|
if (sent) {
|
||||||
if (sent >= sizeof(request)) {
|
if (sent >= sizeof(request)) {
|
||||||
skip = sent - sizeof(request);
|
skip = sent - sizeof(request);
|
||||||
|
|
||||||
|
/* initialize handle for tracing purposes */
|
||||||
|
handle = nbd_cmd_handle(cmd);
|
||||||
|
|
||||||
goto send_pages;
|
goto send_pages;
|
||||||
}
|
}
|
||||||
iov_iter_advance(&from, sent);
|
iov_iter_advance(&from, sent);
|
||||||
@@ -526,11 +533,14 @@ static int nbd_send_cmd(struct nbd_device *nbd, struct nbd_cmd *cmd, int index)
|
|||||||
handle = nbd_cmd_handle(cmd);
|
handle = nbd_cmd_handle(cmd);
|
||||||
memcpy(request.handle, &handle, sizeof(handle));
|
memcpy(request.handle, &handle, sizeof(handle));
|
||||||
|
|
||||||
|
trace_nbd_send_request(&request, nbd->index, blk_mq_rq_from_pdu(cmd));
|
||||||
|
|
||||||
dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
|
dev_dbg(nbd_to_dev(nbd), "request %p: sending control (%s@%llu,%uB)\n",
|
||||||
req, nbdcmd_to_ascii(type),
|
req, nbdcmd_to_ascii(type),
|
||||||
(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
|
(unsigned long long)blk_rq_pos(req) << 9, blk_rq_bytes(req));
|
||||||
result = sock_xmit(nbd, index, 1, &from,
|
result = sock_xmit(nbd, index, 1, &from,
|
||||||
(type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
|
(type == NBD_CMD_WRITE) ? MSG_MORE : 0, &sent);
|
||||||
|
trace_nbd_header_sent(req, handle);
|
||||||
if (result <= 0) {
|
if (result <= 0) {
|
||||||
if (was_interrupted(result)) {
|
if (was_interrupted(result)) {
|
||||||
/* If we havne't sent anything we can just return BUSY,
|
/* If we havne't sent anything we can just return BUSY,
|
||||||
@@ -603,6 +613,7 @@ send_pages:
|
|||||||
bio = next;
|
bio = next;
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
|
trace_nbd_payload_sent(req, handle);
|
||||||
nsock->pending = NULL;
|
nsock->pending = NULL;
|
||||||
nsock->sent = 0;
|
nsock->sent = 0;
|
||||||
return 0;
|
return 0;
|
||||||
@@ -650,6 +661,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
|
|||||||
tag, req);
|
tag, req);
|
||||||
return ERR_PTR(-ENOENT);
|
return ERR_PTR(-ENOENT);
|
||||||
}
|
}
|
||||||
|
trace_nbd_header_received(req, handle);
|
||||||
cmd = blk_mq_rq_to_pdu(req);
|
cmd = blk_mq_rq_to_pdu(req);
|
||||||
|
|
||||||
mutex_lock(&cmd->lock);
|
mutex_lock(&cmd->lock);
|
||||||
@@ -703,6 +715,7 @@ static struct nbd_cmd *nbd_read_stat(struct nbd_device *nbd, int index)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
out:
|
out:
|
||||||
|
trace_nbd_payload_received(req, handle);
|
||||||
mutex_unlock(&cmd->lock);
|
mutex_unlock(&cmd->lock);
|
||||||
return ret ? ERR_PTR(ret) : cmd;
|
return ret ? ERR_PTR(ret) : cmd;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -272,6 +272,7 @@ enum bpf_reg_type {
|
|||||||
PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
|
PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
|
||||||
PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
|
PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
|
||||||
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
|
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
|
||||||
|
PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
|
||||||
};
|
};
|
||||||
|
|
||||||
/* The information passed from prog-specific *_is_valid_access
|
/* The information passed from prog-specific *_is_valid_access
|
||||||
@@ -361,6 +362,7 @@ struct bpf_prog_aux {
|
|||||||
u32 used_map_cnt;
|
u32 used_map_cnt;
|
||||||
u32 max_ctx_offset;
|
u32 max_ctx_offset;
|
||||||
u32 max_pkt_offset;
|
u32 max_pkt_offset;
|
||||||
|
u32 max_tp_access;
|
||||||
u32 stack_depth;
|
u32 stack_depth;
|
||||||
u32 id;
|
u32 id;
|
||||||
u32 func_cnt; /* used by non-func prog as the number of func progs */
|
u32 func_cnt; /* used by non-func prog as the number of func progs */
|
||||||
|
|||||||
@@ -25,6 +25,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
|
|||||||
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
|
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
|
||||||
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
|
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
|
||||||
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
|
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
|
||||||
|
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_CGROUP_BPF
|
#ifdef CONFIG_CGROUP_BPF
|
||||||
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
|
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
|
||||||
|
|||||||
@@ -45,6 +45,7 @@ struct bpf_raw_event_map {
|
|||||||
struct tracepoint *tp;
|
struct tracepoint *tp;
|
||||||
void *bpf_func;
|
void *bpf_func;
|
||||||
u32 num_args;
|
u32 num_args;
|
||||||
|
u32 writable_size;
|
||||||
} __aligned(32);
|
} __aligned(32);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto) \
|
|||||||
* to make sure that if the tracepoint handling changes, the
|
* to make sure that if the tracepoint handling changes, the
|
||||||
* bpf probe will fail to compile unless it too is updated.
|
* bpf probe will fail to compile unless it too is updated.
|
||||||
*/
|
*/
|
||||||
#undef DEFINE_EVENT
|
#define __DEFINE_EVENT(template, call, proto, args, size) \
|
||||||
#define DEFINE_EVENT(template, call, proto, args) \
|
|
||||||
static inline void bpf_test_probe_##call(void) \
|
static inline void bpf_test_probe_##call(void) \
|
||||||
{ \
|
{ \
|
||||||
check_trace_callback_type_##call(__bpf_trace_##template); \
|
check_trace_callback_type_##call(__bpf_trace_##template); \
|
||||||
@@ -81,12 +80,36 @@ __bpf_trace_tp_map_##call = { \
|
|||||||
.tp = &__tracepoint_##call, \
|
.tp = &__tracepoint_##call, \
|
||||||
.bpf_func = (void *)__bpf_trace_##template, \
|
.bpf_func = (void *)__bpf_trace_##template, \
|
||||||
.num_args = COUNT_ARGS(args), \
|
.num_args = COUNT_ARGS(args), \
|
||||||
|
.writable_size = size, \
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define FIRST(x, ...) x
|
||||||
|
|
||||||
|
#undef DEFINE_EVENT_WRITABLE
|
||||||
|
#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \
|
||||||
|
static inline void bpf_test_buffer_##call(void) \
|
||||||
|
{ \
|
||||||
|
/* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \
|
||||||
|
* BUILD_BUG_ON_ZERO() uses a different mechanism that is not \
|
||||||
|
* dead-code-eliminated. \
|
||||||
|
*/ \
|
||||||
|
FIRST(proto); \
|
||||||
|
(void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args))); \
|
||||||
|
} \
|
||||||
|
__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size)
|
||||||
|
|
||||||
|
#undef DEFINE_EVENT
|
||||||
|
#define DEFINE_EVENT(template, call, proto, args) \
|
||||||
|
__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0)
|
||||||
|
|
||||||
#undef DEFINE_EVENT_PRINT
|
#undef DEFINE_EVENT_PRINT
|
||||||
#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
|
#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
|
||||||
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
|
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
|
||||||
|
|
||||||
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
|
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
|
||||||
|
|
||||||
|
#undef DEFINE_EVENT_WRITABLE
|
||||||
|
#undef __DEFINE_EVENT
|
||||||
|
#undef FIRST
|
||||||
|
|
||||||
#endif /* CONFIG_BPF_EVENTS */
|
#endif /* CONFIG_BPF_EVENTS */
|
||||||
|
|||||||
50
include/trace/events/bpf_test_run.h
Normal file
50
include/trace/events/bpf_test_run.h
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
#undef TRACE_SYSTEM
|
||||||
|
#define TRACE_SYSTEM bpf_test_run
|
||||||
|
|
||||||
|
#if !defined(_TRACE_BPF_TEST_RUN_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||||
|
#define _TRACE_BPF_TEST_RUN_H
|
||||||
|
|
||||||
|
#include <linux/tracepoint.h>
|
||||||
|
|
||||||
|
DECLARE_EVENT_CLASS(bpf_test_finish,
|
||||||
|
|
||||||
|
TP_PROTO(int *err),
|
||||||
|
|
||||||
|
TP_ARGS(err),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(int, err)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->err = *err;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("bpf_test_finish with err=%d", __entry->err)
|
||||||
|
);
|
||||||
|
|
||||||
|
#ifdef DEFINE_EVENT_WRITABLE
|
||||||
|
#undef BPF_TEST_RUN_DEFINE_EVENT
|
||||||
|
#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \
|
||||||
|
DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \
|
||||||
|
PARAMS(args), size)
|
||||||
|
#else
|
||||||
|
#undef BPF_TEST_RUN_DEFINE_EVENT
|
||||||
|
#define BPF_TEST_RUN_DEFINE_EVENT(template, call, proto, args, size) \
|
||||||
|
DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
BPF_TEST_RUN_DEFINE_EVENT(bpf_test_finish, bpf_test_finish,
|
||||||
|
|
||||||
|
TP_PROTO(int *err),
|
||||||
|
|
||||||
|
TP_ARGS(err),
|
||||||
|
|
||||||
|
sizeof(int)
|
||||||
|
);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* This part must be outside protection */
|
||||||
|
#include <trace/define_trace.h>
|
||||||
107
include/trace/events/nbd.h
Normal file
107
include/trace/events/nbd.h
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
#undef TRACE_SYSTEM
|
||||||
|
#define TRACE_SYSTEM nbd
|
||||||
|
|
||||||
|
#if !defined(_TRACE_NBD_H) || defined(TRACE_HEADER_MULTI_READ)
|
||||||
|
#define _TRACE_NBD_H
|
||||||
|
|
||||||
|
#include <linux/tracepoint.h>
|
||||||
|
|
||||||
|
DECLARE_EVENT_CLASS(nbd_transport_event,
|
||||||
|
|
||||||
|
TP_PROTO(struct request *req, u64 handle),
|
||||||
|
|
||||||
|
TP_ARGS(req, handle),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(struct request *, req)
|
||||||
|
__field(u64, handle)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->req = req;
|
||||||
|
__entry->handle = handle;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk(
|
||||||
|
"nbd transport event: request %p, handle 0x%016llx",
|
||||||
|
__entry->req,
|
||||||
|
__entry->handle
|
||||||
|
)
|
||||||
|
);
|
||||||
|
|
||||||
|
DEFINE_EVENT(nbd_transport_event, nbd_header_sent,
|
||||||
|
|
||||||
|
TP_PROTO(struct request *req, u64 handle),
|
||||||
|
|
||||||
|
TP_ARGS(req, handle)
|
||||||
|
);
|
||||||
|
|
||||||
|
DEFINE_EVENT(nbd_transport_event, nbd_payload_sent,
|
||||||
|
|
||||||
|
TP_PROTO(struct request *req, u64 handle),
|
||||||
|
|
||||||
|
TP_ARGS(req, handle)
|
||||||
|
);
|
||||||
|
|
||||||
|
DEFINE_EVENT(nbd_transport_event, nbd_header_received,
|
||||||
|
|
||||||
|
TP_PROTO(struct request *req, u64 handle),
|
||||||
|
|
||||||
|
TP_ARGS(req, handle)
|
||||||
|
);
|
||||||
|
|
||||||
|
DEFINE_EVENT(nbd_transport_event, nbd_payload_received,
|
||||||
|
|
||||||
|
TP_PROTO(struct request *req, u64 handle),
|
||||||
|
|
||||||
|
TP_ARGS(req, handle)
|
||||||
|
);
|
||||||
|
|
||||||
|
DECLARE_EVENT_CLASS(nbd_send_request,
|
||||||
|
|
||||||
|
TP_PROTO(struct nbd_request *nbd_request, int index,
|
||||||
|
struct request *rq),
|
||||||
|
|
||||||
|
TP_ARGS(nbd_request, index, rq),
|
||||||
|
|
||||||
|
TP_STRUCT__entry(
|
||||||
|
__field(struct nbd_request *, nbd_request)
|
||||||
|
__field(u64, dev_index)
|
||||||
|
__field(struct request *, request)
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_fast_assign(
|
||||||
|
__entry->nbd_request = 0;
|
||||||
|
__entry->dev_index = index;
|
||||||
|
__entry->request = rq;
|
||||||
|
),
|
||||||
|
|
||||||
|
TP_printk("nbd%lld: request %p", __entry->dev_index, __entry->request)
|
||||||
|
);
|
||||||
|
|
||||||
|
#ifdef DEFINE_EVENT_WRITABLE
|
||||||
|
#undef NBD_DEFINE_EVENT
|
||||||
|
#define NBD_DEFINE_EVENT(template, call, proto, args, size) \
|
||||||
|
DEFINE_EVENT_WRITABLE(template, call, PARAMS(proto), \
|
||||||
|
PARAMS(args), size)
|
||||||
|
#else
|
||||||
|
#undef NBD_DEFINE_EVENT
|
||||||
|
#define NBD_DEFINE_EVENT(template, call, proto, args, size) \
|
||||||
|
DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
NBD_DEFINE_EVENT(nbd_send_request, nbd_send_request,
|
||||||
|
|
||||||
|
TP_PROTO(struct nbd_request *nbd_request, int index,
|
||||||
|
struct request *rq),
|
||||||
|
|
||||||
|
TP_ARGS(nbd_request, index, rq),
|
||||||
|
|
||||||
|
sizeof(struct nbd_request)
|
||||||
|
);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* This part must be outside protection */
|
||||||
|
#include <trace/define_trace.h>
|
||||||
@@ -168,6 +168,7 @@ enum bpf_prog_type {
|
|||||||
BPF_PROG_TYPE_SK_REUSEPORT,
|
BPF_PROG_TYPE_SK_REUSEPORT,
|
||||||
BPF_PROG_TYPE_FLOW_DISSECTOR,
|
BPF_PROG_TYPE_FLOW_DISSECTOR,
|
||||||
BPF_PROG_TYPE_CGROUP_SYSCTL,
|
BPF_PROG_TYPE_CGROUP_SYSCTL,
|
||||||
|
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum bpf_attach_type {
|
enum bpf_attach_type {
|
||||||
|
|||||||
@@ -1789,12 +1789,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
|
|||||||
}
|
}
|
||||||
raw_tp->btp = btp;
|
raw_tp->btp = btp;
|
||||||
|
|
||||||
prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
|
prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
|
||||||
BPF_PROG_TYPE_RAW_TRACEPOINT);
|
|
||||||
if (IS_ERR(prog)) {
|
if (IS_ERR(prog)) {
|
||||||
err = PTR_ERR(prog);
|
err = PTR_ERR(prog);
|
||||||
goto out_free_tp;
|
goto out_free_tp;
|
||||||
}
|
}
|
||||||
|
if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
|
||||||
|
prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
|
||||||
|
err = -EINVAL;
|
||||||
|
goto out_put_prog;
|
||||||
|
}
|
||||||
|
|
||||||
err = bpf_probe_register(raw_tp->btp, prog);
|
err = bpf_probe_register(raw_tp->btp, prog);
|
||||||
if (err)
|
if (err)
|
||||||
|
|||||||
@@ -405,6 +405,7 @@ static const char * const reg_type_str[] = {
|
|||||||
[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
|
[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
|
||||||
[PTR_TO_TCP_SOCK] = "tcp_sock",
|
[PTR_TO_TCP_SOCK] = "tcp_sock",
|
||||||
[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
|
[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
|
||||||
|
[PTR_TO_TP_BUFFER] = "tp_buffer",
|
||||||
};
|
};
|
||||||
|
|
||||||
static char slot_type_char[] = {
|
static char slot_type_char[] = {
|
||||||
@@ -1993,6 +1994,32 @@ static int check_ctx_reg(struct bpf_verifier_env *env,
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int check_tp_buffer_access(struct bpf_verifier_env *env,
|
||||||
|
const struct bpf_reg_state *reg,
|
||||||
|
int regno, int off, int size)
|
||||||
|
{
|
||||||
|
if (off < 0) {
|
||||||
|
verbose(env,
|
||||||
|
"R%d invalid tracepoint buffer access: off=%d, size=%d",
|
||||||
|
regno, off, size);
|
||||||
|
return -EACCES;
|
||||||
|
}
|
||||||
|
if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
|
||||||
|
char tn_buf[48];
|
||||||
|
|
||||||
|
tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
|
||||||
|
verbose(env,
|
||||||
|
"R%d invalid variable buffer offset: off=%d, var_off=%s",
|
||||||
|
regno, off, tn_buf);
|
||||||
|
return -EACCES;
|
||||||
|
}
|
||||||
|
if (off + size > env->prog->aux->max_tp_access)
|
||||||
|
env->prog->aux->max_tp_access = off + size;
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* truncate register to smaller size (in bytes)
|
/* truncate register to smaller size (in bytes)
|
||||||
* must be called with size < BPF_REG_SIZE
|
* must be called with size < BPF_REG_SIZE
|
||||||
*/
|
*/
|
||||||
@@ -2137,6 +2164,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
|
|||||||
err = check_sock_access(env, insn_idx, regno, off, size, t);
|
err = check_sock_access(env, insn_idx, regno, off, size, t);
|
||||||
if (!err && value_regno >= 0)
|
if (!err && value_regno >= 0)
|
||||||
mark_reg_unknown(env, regs, value_regno);
|
mark_reg_unknown(env, regs, value_regno);
|
||||||
|
} else if (reg->type == PTR_TO_TP_BUFFER) {
|
||||||
|
err = check_tp_buffer_access(env, reg, regno, off, size);
|
||||||
|
if (!err && t == BPF_READ && value_regno >= 0)
|
||||||
|
mark_reg_unknown(env, regs, value_regno);
|
||||||
} else {
|
} else {
|
||||||
verbose(env, "R%d invalid mem access '%s'\n", regno,
|
verbose(env, "R%d invalid mem access '%s'\n", regno,
|
||||||
reg_type_str[reg->type]);
|
reg_type_str[reg->type]);
|
||||||
|
|||||||
@@ -915,6 +915,27 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
|
|||||||
const struct bpf_prog_ops raw_tracepoint_prog_ops = {
|
const struct bpf_prog_ops raw_tracepoint_prog_ops = {
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static bool raw_tp_writable_prog_is_valid_access(int off, int size,
|
||||||
|
enum bpf_access_type type,
|
||||||
|
const struct bpf_prog *prog,
|
||||||
|
struct bpf_insn_access_aux *info)
|
||||||
|
{
|
||||||
|
if (off == 0) {
|
||||||
|
if (size != sizeof(u64) || type != BPF_READ)
|
||||||
|
return false;
|
||||||
|
info->reg_type = PTR_TO_TP_BUFFER;
|
||||||
|
}
|
||||||
|
return raw_tp_prog_is_valid_access(off, size, type, prog, info);
|
||||||
|
}
|
||||||
|
|
||||||
|
const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = {
|
||||||
|
.get_func_proto = raw_tp_prog_func_proto,
|
||||||
|
.is_valid_access = raw_tp_writable_prog_is_valid_access,
|
||||||
|
};
|
||||||
|
|
||||||
|
const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = {
|
||||||
|
};
|
||||||
|
|
||||||
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
|
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
|
||||||
const struct bpf_prog *prog,
|
const struct bpf_prog *prog,
|
||||||
struct bpf_insn_access_aux *info)
|
struct bpf_insn_access_aux *info)
|
||||||
@@ -1204,6 +1225,9 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
|
|||||||
if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
|
if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (prog->aux->max_tp_access > btp->writable_size)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog);
|
return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -13,6 +13,9 @@
|
|||||||
#include <net/sock.h>
|
#include <net/sock.h>
|
||||||
#include <net/tcp.h>
|
#include <net/tcp.h>
|
||||||
|
|
||||||
|
#define CREATE_TRACE_POINTS
|
||||||
|
#include <trace/events/bpf_test_run.h>
|
||||||
|
|
||||||
static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
|
static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
|
||||||
u32 *retval, u32 *time)
|
u32 *retval, u32 *time)
|
||||||
{
|
{
|
||||||
@@ -100,6 +103,7 @@ static int bpf_test_finish(const union bpf_attr *kattr,
|
|||||||
if (err != -ENOSPC)
|
if (err != -ENOSPC)
|
||||||
err = 0;
|
err = 0;
|
||||||
out:
|
out:
|
||||||
|
trace_bpf_test_finish(&err);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -168,6 +168,7 @@ enum bpf_prog_type {
|
|||||||
BPF_PROG_TYPE_SK_REUSEPORT,
|
BPF_PROG_TYPE_SK_REUSEPORT,
|
||||||
BPF_PROG_TYPE_FLOW_DISSECTOR,
|
BPF_PROG_TYPE_FLOW_DISSECTOR,
|
||||||
BPF_PROG_TYPE_CGROUP_SYSCTL,
|
BPF_PROG_TYPE_CGROUP_SYSCTL,
|
||||||
|
BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum bpf_attach_type {
|
enum bpf_attach_type {
|
||||||
@@ -1737,12 +1738,19 @@ union bpf_attr {
|
|||||||
* error if an eBPF program tries to set a callback that is not
|
* error if an eBPF program tries to set a callback that is not
|
||||||
* supported in the current kernel.
|
* supported in the current kernel.
|
||||||
*
|
*
|
||||||
* The supported callback values that *argval* can combine are:
|
* *argval* is a flag array which can combine these flags:
|
||||||
*
|
*
|
||||||
* * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
|
* * **BPF_SOCK_OPS_RTO_CB_FLAG** (retransmission time out)
|
||||||
* * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
|
* * **BPF_SOCK_OPS_RETRANS_CB_FLAG** (retransmission)
|
||||||
* * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
|
* * **BPF_SOCK_OPS_STATE_CB_FLAG** (TCP state change)
|
||||||
*
|
*
|
||||||
|
* Therefore, this function can be used to clear a callback flag by
|
||||||
|
* setting the appropriate bit to zero. e.g. to disable the RTO
|
||||||
|
* callback:
|
||||||
|
*
|
||||||
|
* **bpf_sock_ops_cb_flags_set(bpf_sock,**
|
||||||
|
* **bpf_sock->bpf_sock_ops_cb_flags & ~BPF_SOCK_OPS_RTO_CB_FLAG)**
|
||||||
|
*
|
||||||
* Here are some examples of where one could call such eBPF
|
* Here are some examples of where one could call such eBPF
|
||||||
* program:
|
* program:
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -2136,6 +2136,7 @@ static bool bpf_prog_type__needs_kver(enum bpf_prog_type type)
|
|||||||
case BPF_PROG_TYPE_UNSPEC:
|
case BPF_PROG_TYPE_UNSPEC:
|
||||||
case BPF_PROG_TYPE_TRACEPOINT:
|
case BPF_PROG_TYPE_TRACEPOINT:
|
||||||
case BPF_PROG_TYPE_RAW_TRACEPOINT:
|
case BPF_PROG_TYPE_RAW_TRACEPOINT:
|
||||||
|
case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
|
||||||
case BPF_PROG_TYPE_PERF_EVENT:
|
case BPF_PROG_TYPE_PERF_EVENT:
|
||||||
case BPF_PROG_TYPE_CGROUP_SYSCTL:
|
case BPF_PROG_TYPE_CGROUP_SYSCTL:
|
||||||
return false;
|
return false;
|
||||||
|
|||||||
@@ -93,6 +93,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,
|
|||||||
case BPF_PROG_TYPE_CGROUP_DEVICE:
|
case BPF_PROG_TYPE_CGROUP_DEVICE:
|
||||||
case BPF_PROG_TYPE_SK_MSG:
|
case BPF_PROG_TYPE_SK_MSG:
|
||||||
case BPF_PROG_TYPE_RAW_TRACEPOINT:
|
case BPF_PROG_TYPE_RAW_TRACEPOINT:
|
||||||
|
case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
|
||||||
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
|
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
|
||||||
case BPF_PROG_TYPE_LIRC_MODE2:
|
case BPF_PROG_TYPE_LIRC_MODE2:
|
||||||
case BPF_PROG_TYPE_SK_REUSEPORT:
|
case BPF_PROG_TYPE_SK_REUSEPORT:
|
||||||
|
|||||||
@@ -0,0 +1,42 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
#include <test_progs.h>
|
||||||
|
#include <linux/nbd.h>
|
||||||
|
|
||||||
|
void test_raw_tp_writable_reject_nbd_invalid(void)
|
||||||
|
{
|
||||||
|
__u32 duration = 0;
|
||||||
|
char error[4096];
|
||||||
|
int bpf_fd = -1, tp_fd = -1;
|
||||||
|
|
||||||
|
const struct bpf_insn program[] = {
|
||||||
|
/* r6 is our tp buffer */
|
||||||
|
BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
|
||||||
|
/* one byte beyond the end of the nbd_request struct */
|
||||||
|
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6,
|
||||||
|
sizeof(struct nbd_request)),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bpf_load_program_attr load_attr = {
|
||||||
|
.prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
|
||||||
|
.license = "GPL v2",
|
||||||
|
.insns = program,
|
||||||
|
.insns_cnt = sizeof(program) / sizeof(struct bpf_insn),
|
||||||
|
.log_level = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error));
|
||||||
|
if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable load",
|
||||||
|
"failed: %d errno %d\n", bpf_fd, errno))
|
||||||
|
return;
|
||||||
|
|
||||||
|
tp_fd = bpf_raw_tracepoint_open("nbd_send_request", bpf_fd);
|
||||||
|
if (CHECK(tp_fd >= 0, "bpf_raw_tracepoint_writable open",
|
||||||
|
"erroneously succeeded\n"))
|
||||||
|
goto out_bpffd;
|
||||||
|
|
||||||
|
close(tp_fd);
|
||||||
|
out_bpffd:
|
||||||
|
close(bpf_fd);
|
||||||
|
}
|
||||||
@@ -0,0 +1,80 @@
|
|||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
#include <test_progs.h>
|
||||||
|
#include <linux/nbd.h>
|
||||||
|
|
||||||
|
void test_raw_tp_writable_test_run(void)
|
||||||
|
{
|
||||||
|
__u32 duration = 0;
|
||||||
|
char error[4096];
|
||||||
|
|
||||||
|
const struct bpf_insn trace_program[] = {
|
||||||
|
BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
|
||||||
|
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 42),
|
||||||
|
BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_0, 0),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bpf_load_program_attr load_attr = {
|
||||||
|
.prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
|
||||||
|
.license = "GPL v2",
|
||||||
|
.insns = trace_program,
|
||||||
|
.insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn),
|
||||||
|
.log_level = 2,
|
||||||
|
};
|
||||||
|
|
||||||
|
int bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error));
|
||||||
|
if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable loaded",
|
||||||
|
"failed: %d errno %d\n", bpf_fd, errno))
|
||||||
|
return;
|
||||||
|
|
||||||
|
const struct bpf_insn skb_program[] = {
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
};
|
||||||
|
|
||||||
|
struct bpf_load_program_attr skb_load_attr = {
|
||||||
|
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
|
||||||
|
.license = "GPL v2",
|
||||||
|
.insns = skb_program,
|
||||||
|
.insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn),
|
||||||
|
};
|
||||||
|
|
||||||
|
int filter_fd =
|
||||||
|
bpf_load_program_xattr(&skb_load_attr, error, sizeof(error));
|
||||||
|
if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n",
|
||||||
|
filter_fd, errno))
|
||||||
|
goto out_bpffd;
|
||||||
|
|
||||||
|
int tp_fd = bpf_raw_tracepoint_open("bpf_test_finish", bpf_fd);
|
||||||
|
if (CHECK(tp_fd < 0, "bpf_raw_tracepoint_writable opened",
|
||||||
|
"failed: %d errno %d\n", tp_fd, errno))
|
||||||
|
goto out_filterfd;
|
||||||
|
|
||||||
|
char test_skb[128] = {
|
||||||
|
0,
|
||||||
|
};
|
||||||
|
|
||||||
|
__u32 prog_ret;
|
||||||
|
int err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0,
|
||||||
|
0, &prog_ret, 0);
|
||||||
|
CHECK(err != 42, "test_run",
|
||||||
|
"tracepoint did not modify return value\n");
|
||||||
|
CHECK(prog_ret != 0, "test_run_ret",
|
||||||
|
"socket_filter did not return 0\n");
|
||||||
|
|
||||||
|
close(tp_fd);
|
||||||
|
|
||||||
|
err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, 0,
|
||||||
|
&prog_ret, 0);
|
||||||
|
CHECK(err != 0, "test_run_notrace",
|
||||||
|
"test_run failed with %d errno %d\n", err, errno);
|
||||||
|
CHECK(prog_ret != 0, "test_run_ret_notrace",
|
||||||
|
"socket_filter did not return 0\n");
|
||||||
|
|
||||||
|
out_filterfd:
|
||||||
|
close(filter_fd);
|
||||||
|
out_bpffd:
|
||||||
|
close(bpf_fd);
|
||||||
|
}
|
||||||
34
tools/testing/selftests/bpf/verifier/raw_tp_writable.c
Normal file
34
tools/testing/selftests/bpf/verifier/raw_tp_writable.c
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
{
|
||||||
|
"raw_tracepoint_writable: reject variable offset",
|
||||||
|
.insns = {
|
||||||
|
/* r6 is our tp buffer */
|
||||||
|
BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
|
||||||
|
|
||||||
|
BPF_LD_MAP_FD(BPF_REG_1, 0),
|
||||||
|
/* move the key (== 0) to r10-8 */
|
||||||
|
BPF_MOV32_IMM(BPF_REG_0, 0),
|
||||||
|
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
|
||||||
|
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
|
||||||
|
BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
|
||||||
|
/* lookup in the map */
|
||||||
|
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
|
||||||
|
BPF_FUNC_map_lookup_elem),
|
||||||
|
|
||||||
|
/* exit clean if null */
|
||||||
|
BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
|
||||||
|
/* shift the buffer pointer to a variable location */
|
||||||
|
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
|
||||||
|
BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_0),
|
||||||
|
/* clobber whatever's there */
|
||||||
|
BPF_MOV64_IMM(BPF_REG_7, 4242),
|
||||||
|
BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, 0),
|
||||||
|
|
||||||
|
BPF_MOV64_IMM(BPF_REG_0, 0),
|
||||||
|
BPF_EXIT_INSN(),
|
||||||
|
},
|
||||||
|
.fixup_map_hash_8b = { 1, },
|
||||||
|
.prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
|
||||||
|
.errstr = "R6 invalid variable buffer offset: off=0, var_off=(0x0; 0xffffffff)",
|
||||||
|
},
|
||||||
Reference in New Issue
Block a user