【ebpf】first try2

虚拟机基本环境信息

1
2
3
4
5
6
7
8
9
10
11
12
13
[root@ubuntu-22 ~]# uname -a
Linux ubuntu-22 5.15.0-58-generic #64-Ubuntu SMP Thu Jan 5 11:43:13 UTC 2023 x86_64 x86_64 x86_64 GNU/Linux

[root@ubuntu-22 ~]# uname -r
5.15.0-58-generic

[root@ubuntu-22 ~]# lsb_release -a
No LSB modules are available.
Distributor ID: Ubuntu
Description: Ubuntu 22.04.1 LTS
Release: 22.04
Codename: jammy

搭建ubuntu虚拟机可以看到我的另一篇教程:
# 【网络成长记】VMware16.2.2中创建ubuntu22.04.1虚拟机+修改静态ip+换源+mobaxterm远程连接+修改命令提示符颜色

安装 eBPF 开发和运行所需要的开发工具

1
[root@ubuntu-22 ~]# sudo apt-get install -y make clang llvm libelf-dev libbpf-dev bpfcc-tools libbpfcc-dev linux-tools-$(uname -r) linux-headers-$(uname -r)

ebpf的CO-RE特性前提条件

在使用 CO-RE 之前,内核需要开启 CONFIG_DEBUG_INFO_BTF=y 和 CONFIG_DEBUG_INFO=y 这两个编译选项。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
[root@ubuntu-22 ~]# cat /boot/config-5.15.0-58-generic  |grep -i config_debug_info
CONFIG_DEBUG_INFO=y
# CONFIG_DEBUG_INFO_REDUCED is not set
# CONFIG_DEBUG_INFO_COMPRESSED is not set
# CONFIG_DEBUG_INFO_SPLIT is not set
# CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT is not set
# CONFIG_DEBUG_INFO_DWARF4 is not set
CONFIG_DEBUG_INFO_DWARF5=y
CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_INFO_BTF_MODULES=y
[root@ubuntu-22 ~]#
[root@ubuntu-22 ~]# ll /sys/kernel/btf/vmlinux
-r--r--r-- 1 root root 5178563 1月 29 16:02 /sys/kernel/btf/vmlinux

第一个ebpf程序:跟踪 openat()(即打开文件)系统调用

第一步:使用 C 开发一个 eBPF 程序

1
2
3
4
5
6
7
8
9
[root@ubuntu-22 geektime]# cat hello.c
int hello_world(void *ctx)
{
bpf_trace_printk("Hello, World!"); // 最常用的 BPF 辅助函数,它的作用是输出一段字符串。不过,由于 eBPF 运行在内核中,它的输出并不是通常的标准输出(stdout),而是内核调试文件 /sys/kernel/debug/tracing/trace_pipe
return 0;
}

// eBPF程序并不能随便的调用内核函数,必须通过辅助函数才可完成eBPF程序和其他内核模块的交互,eg bpf_trace_printk()

第二步:使用 Python 和 BCC 库开发一个用户态程序

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
[root@ubuntu-22 geektime]# cat hello.py
#!/usr/bin/env python3
# 1) import bcc library
# 导入了 BCC 库的 BPF 模块,以便接下来调用;
from bcc import BPF

# 2) load BPF program
# 调用 BPF() 加载第一步开发的 BPF 源代码;
b = BPF(src_file="hello.c")

# 3) attach kprobe
# 将 BPF 程序挂载到内核探针(简称 kprobe),其中 do_sys_openat2() 是系统调用 openat() 在内核中的实现;
# 调用了 attach_kprobe 函数,绑定了一个内核跟踪事件
b.attach_kprobe(event="do_sys_openat2", fn_name="hello_world")


# 4) read and print /sys/kernel/debug/tracing/trace_pipe
# 读取内核调试文件 /sys/kernel/debug/tracing/trace_pipe 的内容,并打印到标准输出中。
b.trace_print()


# 内核函数 do_sys_openat2

# 理解:opennat()和opennat2()都是系统调用
# 在计算机中运行程序、读写文件,都会涉及到文件的打开操作,Linux v5.10 与文件打开相关的系统调用有 open() / creat() / openat() / openat2这四类,在使用 glibc v2.32 时,几乎所有的文件打开操作使用的都是 openat2() 这个系统调用。

# 用高级语言开发的 eBPF 程序,需要首先编译为 BPF 字节码,然后借助 bpf 系统调用加载到内核中,最后再通过性能监控等接口与具体的内核事件进行绑定。这样,内核的性能监控模块才会在内核事件发生时,自动执行我们开发的 eBPF 程序。

第三步:执行 eBPF 程序

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
[root@ubuntu-22 geektime]# sudo python3 hello.py
In file included from <built-in>:2:
In file included from /virtual/include/bcc/bpf.h:12:
In file included from include/linux/types.h:6:
In file included from include/uapi/linux/types.h:14:
In file included from include/uapi/linux/posix_types.h:5:
In file included from include/linux/stddef.h:5:
In file included from include/uapi/linux/stddef.h:5:
In file included from include/linux/compiler_types.h:80:
include/linux/compiler-clang.h:41:9: warning: '__HAVE_BUILTIN_BSWAP32__' macro redefined [-Wmacro-redefined]
#define __HAVE_BUILTIN_BSWAP32__
^
<command line>:4:9: note: previous definition is here
#define __HAVE_BUILTIN_BSWAP32__ 1
^
In file included from <built-in>:2:
In file included from /virtual/include/bcc/bpf.h:12:
In file included from include/linux/types.h:6:
In file included from include/uapi/linux/types.h:14:
In file included from include/uapi/linux/posix_types.h:5:
In file included from include/linux/stddef.h:5:
In file included from include/uapi/linux/stddef.h:5:
In file included from include/linux/compiler_types.h:80:
include/linux/compiler-clang.h:42:9: warning: '__HAVE_BUILTIN_BSWAP64__' macro redefined [-Wmacro-redefined]
#define __HAVE_BUILTIN_BSWAP64__
^
<command line>:5:9: note: previous definition is here
#define __HAVE_BUILTIN_BSWAP64__ 1
^
In file included from <built-in>:2:
In file included from /virtual/include/bcc/bpf.h:12:
In file included from include/linux/types.h:6:
In file included from include/uapi/linux/types.h:14:
In file included from include/uapi/linux/posix_types.h:5:
In file included from include/linux/stddef.h:5:
In file included from include/uapi/linux/stddef.h:5:
In file included from include/linux/compiler_types.h:80:
include/linux/compiler-clang.h:43:9: warning: '__HAVE_BUILTIN_BSWAP16__' macro redefined [-Wmacro-redefined]
#define __HAVE_BUILTIN_BSWAP16__
^
<command line>:3:9: note: previous definition is here
#define __HAVE_BUILTIN_BSWAP16__ 1
^
3 warnings generated.
b' systemd-oomd-636 [001] d...1 7270.693212: bpf_trace_printk: Hello, World!'
b' systemd-oomd-636 [001] d...1 7270.693407: bpf_trace_printk: Hello, World!'
b' head-43243 [000] d...1 7271.246610: bpf_trace_printk: Hello, World!'
b' head-43243 [000] d...1 7271.246653: bpf_trace_printk: Hello, World!'
b' head-43243 [000] d...1 7271.246911: bpf_trace_printk: Hello, World!'
b' head-43243 [000] d...1 7271.247487: bpf_trace_printk: Hello, World!'
b' head-43243 [000] d...1 7271.247529: bpf_trace_printk: Hello, World!'
b' <...>-43244 [000] d...1 7271.248757: bpf_trace_printk: Hello, World!'
b' <...>-43244 [000] d...1 7271.248848: bpf_trace_printk: Hello, World!'
b' <...>-43245 [000] d...1 7271.249767: bpf_trace_printk: Hello, World!'
b' <...>-43245 [000] d...1 7271.249825: bpf_trace_printk: Hello, World!'
b' who-43246 [000] d...1 7271.251964: bpf_trace_printk: Hello, World!'
b' who-43246 [000] d...1 7271.252028: bpf_trace_printk: Hello, World!'
b' <...>-43247 [000] d...1 7271.253499: bpf_trace_printk: Hello, World!'
b' <...>-43247 [000] d...1 7271.253554: bpf_trace_printk: Hello, World!'
b' systemd-oomd-636 [001] d...1 7271.943581: bpf_trace_printk: Hello, World!'
b' systemd-oomd-636 [001] d...1 7272.943654: bpf_trace_printk: Hello, World!'
b' head-43255 [000] d...1 7273.267842: bpf_trace_printk: Hello, World!'
b' head-43255 [000] d...1 7273.268730: bpf_trace_printk: Hello, World!'
b' tail-43256 [000] d...1 7273.272320: bpf_trace_printk: Hello, World!'
b' tail-43256 [000] d...1 7273.272921: bpf_trace_printk: Hello, World!'
b' tail-43256 [000] d...1 7273.273573: bpf_trace_printk: Hello, World!'
b' tail-43256 [000] d...1 7273.274131: bpf_trace_printk: Hello, World!'


改进

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
[root@ubuntu-22 geektime]# cat trace-open.c
// 包含头文件
#include <uapi/linux/openat2.h>
#include <linux/sched.h>

// 定义数据结构
struct data_t {
u32 pid;
u64 ts;
char comm[TASK_COMM_LEN];
char fname[NAME_MAX];
};

// 定义性能事件映射 定义一个 Perf 事件类型的 BPF 映射
BPF_PERF_OUTPUT(events);



// 定义kprobe处理函数
int hello_world(struct pt_regs *ctx, int dfd, const char __user * filename, struct open_how *how)
{
struct data_t data = { };

// 获取PID和时间
data.pid = bpf_get_current_pid_tgid(); // bpf_get_current_pid_tgid用于获取进程的 TGID 和 PID。因为这儿定义的 data.pid 数据类型为 u32,所以高 32 位舍弃掉后就是进程的 PID
data.ts = bpf_ktime_get_ns(); // bpf_ktime_get_ns用于获取系统自启动以来的时间,单位是纳秒

// 获取进程名
if (bpf_get_current_comm(&data.comm, sizeof(data.comm)) == 0) // bpf_get_current_comm用于获取进程名,并把进程名复制到预定义的缓冲区中
{
bpf_probe_read(&data.fname, sizeof(data.fname), (void *)filename); // bpf_probe_read 用于从指定指针处读取固定大小的数据,这里则用于读取进程打开的文件名。
}

// 提交性能事件 调用 perf_submit() 把数据提交到刚才定义的 BPF 映射
events.perf_submit(ctx, &data, sizeof(data));
return 0;
}

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
[root@ubuntu-22 geektime]# cat trace-open.py
from bcc import BPF

# 1) load BPF program
# 加载 eBPF 程序并挂载到内核探针上
b = BPF(src_file="trace-open.c")
b.attach_kprobe(event="do_sys_openat2", fn_name="hello_world")

# 2) print header
# 输出一行 Header 字符串表示数据的格式
print("%-18s %-16s %-6s %-16s" % ("TIME(s)", "COMM", "PID", "FILE"))

# 3) define the callback for perf event
# print_event 定义一个数据处理的回调函数,打印进程的名字、PID 以及它调用 openat 时打开的文件
start = 0
def print_event(cpu, data, size):
global start
event = b["events"].event(data)
if start == 0:
start = event.ts
time_s = (float(event.ts - start)) / 1000000000
print("%-18.9f %-16s %-6d %-16s" % (time_s, event.comm, event.pid, event.fname))

# 4) loop with callback to print_event
# open_perf_buffer 定义了名为 “events” 的 Perf 事件映射,而后通过一个循环调用 perf_buffer_poll 读取映射的内容,并执行回调函数输出进程信息
b["events"].open_perf_buffer(print_event)
while 1:
try:
b.perf_buffer_poll()
except KeyboardInterrupt:
exit()


# 怎样从用户态读取 BPF 映射内容并输出到标准输出(stdout)呢?
# 在 BCC 中,与 eBPF 程序中 BPF_PERF_OUTPUT 相对应的用户态辅助函数是 open_perf_buffer() 。它需要传入一个回调函数,用于处理从 Perf 事件类型的 BPF 映射中读取到的数据。
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
[root@ubuntu-22 geektime]# sudo python3 trace-open.py
In file included from <built-in>:2:
In file included from /virtual/include/bcc/bpf.h:12:
In file included from include/linux/types.h:6:
In file included from include/uapi/linux/types.h:14:
In file included from include/uapi/linux/posix_types.h:5:
In file included from include/linux/stddef.h:5:
In file included from include/uapi/linux/stddef.h:5:
In file included from include/linux/compiler_types.h:80:
include/linux/compiler-clang.h:41:9: warning: '__HAVE_BUILTIN_BSWAP32__' macro redefined [-Wmacro-redefined]
#define __HAVE_BUILTIN_BSWAP32__
^
<command line>:4:9: note: previous definition is here
#define __HAVE_BUILTIN_BSWAP32__ 1
^
In file included from <built-in>:2:
In file included from /virtual/include/bcc/bpf.h:12:
In file included from include/linux/types.h:6:
In file included from include/uapi/linux/types.h:14:
In file included from include/uapi/linux/posix_types.h:5:
In file included from include/linux/stddef.h:5:
In file included from include/uapi/linux/stddef.h:5:
In file included from include/linux/compiler_types.h:80:
include/linux/compiler-clang.h:42:9: warning: '__HAVE_BUILTIN_BSWAP64__' macro redefined [-Wmacro-redefined]
#define __HAVE_BUILTIN_BSWAP64__
^
<command line>:5:9: note: previous definition is here
#define __HAVE_BUILTIN_BSWAP64__ 1
^
In file included from <built-in>:2:
In file included from /virtual/include/bcc/bpf.h:12:
In file included from include/linux/types.h:6:
In file included from include/uapi/linux/types.h:14:
In file included from include/uapi/linux/posix_types.h:5:
In file included from include/linux/stddef.h:5:
In file included from include/uapi/linux/stddef.h:5:
In file included from include/linux/compiler_types.h:80:
include/linux/compiler-clang.h:43:9: warning: '__HAVE_BUILTIN_BSWAP16__' macro redefined [-Wmacro-redefined]
#define __HAVE_BUILTIN_BSWAP16__
^
<command line>:3:9: note: previous definition is here
#define __HAVE_BUILTIN_BSWAP16__ 1
^
3 warnings generated.
TIME(s) COMM PID FILE
0.000000000 b'systemd-oomd' 636 b'/proc/meminfo'
0.249586834 b'systemd-oomd' 636 b'/sys/fs/cgroup/user.slice/user-0.slice/user@0.service/memory.pressure'
0.249744243 b'systemd-oomd' 636 b'/sys/fs/cgroup/user.slice/user-0.slice/user@0.service/memory.current'
0.249765844 b'systemd-oomd' 636 b'/sys/fs/cgroup/user.slice/user-0.slice/user@0.service/memory.min'
0.249783697 b'systemd-oomd' 636 b'/sys/fs/cgroup/user.slice/user-0.slice/user@0.service/memory.low'
0.249800399 b'systemd-oomd' 636 b'/sys/fs/cgroup/user.slice/user-0.slice/user@0.service/memory.swap.current'
0.249818063 b'systemd-oomd' 636 b'/sys/fs/cgroup/user.slice/user-0.slice/user@0.service/memory.stat'
0.249918042 b'systemd-oomd' 636 b'/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/memory.pressure'
0.249993365 b'systemd-oomd' 636 b'/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/memory.current'
0.250011319 b'systemd-oomd' 636 b'/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/memory.min'
0.250028852 b'systemd-oomd' 636 b'/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/memory.low'
0.250045734 b'systemd-oomd' 636 b'/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/memory.swap.current'
0.250062576 b'systemd-oomd' 636 b'/sys/fs/cgroup/user.slice/user-1000.slice/user@1000.service/memory.stat'
0.250117370 b'systemd-oomd' 636 b'/proc/meminfo'
0.499947185 b'systemd-oomd' 636 b'/proc/meminfo'
0.537863286 b'head' 53569 b'/etc/ld.so.cache'
0.537898282 b'head' 53569 b'/lib/x86_64-linux-gnu/libc.so.6'
0.538648765 b'head' 53569 b'/usr/lib/locale/locale-archive'
0.538766849 b'head' 53569 b'/usr/share/locale/locale.alias'
0.538832774 b'head' 53569 b'/usr/share/locale/en_US/LC_MESSAGES/coreutils.mo'
0.538838585 b'head' 53569 b'/usr/share/locale/en/LC_MESSAGES/coreutils.mo'
0.538844537 b'head' 53569 b'/usr/share/locale-langpack/en_US/LC_MESSAGES/coreutils.mo'
0.538850207 b'head' 53569 b'/usr/share/locale-langpack/en/LC_MESSAGES/coreutils.mo'
0.538868322 b'head' 53569 b'/proc/meminfo'
0.539986006 b'head' 53570 b'/etc/ld.so.cache'
0.540310641 b'head' 53570 b'/lib/x86_64-linux-gnu/libc.so.6'
0.541208063 b'head' 53570 b'/usr/lib/locale/locale-archive'
0.541523040 b'head' 53570 b'/usr/share/locale/locale.alias'
0.541664007 b'head' 53570 b'/usr/share/locale/en_US/LC_MESSAGES/coreutils.mo'
0.541768956 b'head' 53570 b'/usr/share/locale/en/LC_MESSAGES/coreutils.mo'
0.541841113 b'head' 53570 b'/usr/share/locale-langpack/en_US/LC_MESSAGES/coreutils.mo'
0.541911206 b'head' 53570 b'/usr/share/locale-langpack/en/LC_MESSAGES/coreutils.mo'
0.541993672 b'head' 53570 b'/proc/stat'
0.542297589 b'head' 53570 b'/proc/version'
0.542427826 b'head' 53570 b'/proc/uptime'
0.542513568 b'head' 53570 b'/proc/loadavg'
0.542576598 b'head' 53570 b'/proc/sys/fs/file-nr'
0.542649275 b'head' 53570 b'/proc/sys/kernel/hostname'


【ebpf】first try2
http://example.com/2023/01/29/ebpf/【ebpf】first try2/
作者
ningan123
发布于
2023年1月29日
许可协议