目录

Operating System Chapter12 进程的地址空间


Operating System

$Nanjing\ University\rightarrow Yanyan\ Jiang\newline$

Overview

  • 复习

    • 操作系统:加载第一个 init 程序,随后变为 “异常处理程序”

    • init: fork, execve, exit 和其他系统调用创造整个操作系统世界

  • 本次课回答的问题

    • Q: 进程的地址空间是如何创建、如何更改的?
  • 本次课主要内容

    • 进程的地址空间和管理 (mmap)

进程的地址空间

进程的地址空间

  • char *p 可以和 intptr_t 互相转换

    • 可以指向 “任何地方”
    1
    2
    3
    4
    5
    6
    7
    
    #include <stdio.h>
    
    int main() {
      char *p;
      p = (void *)main;
      p = (void *)(0x12345678l);
    }
    
    1
    2
    3
    4
    5
    6
    7
    8
    
    #include <stdio.h>
    
    int main() {
      unsigned *p;
      p = (void *)main;
      // p = (void *)(0x12345678l);
      printf("%x\n", *p); //=>fa1e0ff3
    }
    
    • 反汇编
     1
     2
     3
     4
     5
     6
     7
     8
     9
    10
    11
    12
    13
    14
    15
    16
    17
    
    0000000000001149 <main>:
        1149:       f3 0f 1e fa             endbr64 
        114d:       55                      push   %rbp
        114e:       48 89 e5                mov    %rsp,%rbp
        1151:       48 83 ec 10             sub    $0x10,%rsp
        1155:       48 8d 05 ed ff ff ff    lea    -0x13(%rip),%rax        # 1149 <main>
        115c:       48 89 45 f8             mov    %rax,-0x8(%rbp)
        1160:       48 8b 45 f8             mov    -0x8(%rbp),%rax
        1164:       8b 00                   mov    (%rax),%eax
        1166:       89 c6                   mov    %eax,%esi
        1168:       48 8d 05 95 0e 00 00    lea    0xe95(%rip),%rax        # 2004 <_IO_stdin_used+0x4>
        116f:       48 89 c7                mov    %rax,%rdi
        1172:       b8 00 00 00 00          mov    $0x0,%eax
        1177:       e8 d4 fe ff ff          call   1050 <printf@plt>
        117c:       b8 00 00 00 00          mov    $0x0,%eax
        1181:       c9                      leave  
        1182:       c3  
    
    • 正好对应endbr64(注意小端口输出顺序)

    • 合法的地址 (可读或可写)

      • 代码 (main, %rip 会从此处取出待执行的指令),只读

      • 数据 (static int x),读写

      • 堆栈 (int y),读写

      • 运行时分配的内存 (???),读写

      • 动态链接库 (???)

    • 非法的地址

    1
    2
    3
    4
    5
    6
    7
    8
    
    #include <stdio.h>
    
    int main() {
      unsigned *p;
      // p = (void *)main;
      p = (void *)(0x12345678l);
      printf("%x\n", *p); //=>zsh: segmentation fault
    }
    
    • NULL,导致 segmentation fault
  • 它们停留在概念中,但实际呢?

查看进程的地址空间

  • pmap (1) - report memory of a process

    • Claim: pmap 是通过访问 procfs (/proc/) 实现的

    • 如何验证这一点?

  • 查看进程的地址空间

    • minimal.S (静态链接)

    • 最小的 Hello World (静态/动态链接)

      • 进程的地址空间:若干连续的 “段”

      • “段” 的内存可以访问

      • 不在段内/违反权限的内存访问 触发 SIGSEGV

        • gdb 可以 “越权访问”,但不能访问 “不存在” 的地址
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
#include <sys/syscall.h>

.globl _start
_start:
  movq $SYS_write, %rax   // write(
  movq $1,         %rdi   //   fd=1,
  movq $st,        %rsi   //   buf=st,
  movq $(ed - st), %rdx   //   count=ed-st
  syscall                 // );

  movq $SYS_exit,  %rax   // exit(
  movq $1,         %rdi   //   status=1
  syscall                 // );

st:
  .ascii "\033[01;31mHello, OS World\033[0m\n"
ed:
1
2
3
4
5
6
7
┌──(jungle㉿LAPTOP-A7S3TAA4)-[/mnt/d/work for vscode/chapter12]
└─$ strace ./minimal
execve("./minimal", ["./minimal"], 0x7fffdfe108e0 /* 41 vars */) = 0
write(1, "\33[01;31mHello, OS World\33[0m\n", 28Hello, OS World
) = 28
exit(1)                                 = ?
+++ exited with 1 +++
/img/Operating System/chapter12-1.png
查看进程号
/img/Operating System/chapter12-2.png
使用pmap查看一个进程所有的地址空间
  • 可以看到由许多“段”组成的
  • 仔细看可以发现第一段的段地址和starti首地址的联系
/img/Operating System/chapter12-3.png
layout src
  • pmap干了啥?
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
┌──(jungle㉿LAPTOP-A7S3TAA4)-[/mnt/d/work for vscode/chapter12]
└─$ strace pmap 1431
execve("/usr/bin/pmap", ["pmap", "1431"], 0x7ffe115f8938 /* 41 vars */) = 0
brk(NULL)                               = 0x55d0cc7da000
arch_prctl(0x3001 /* ARCH_??? */, 0x7ffc6ce9b2d0) = -1 EINVAL (Invalid argument)
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4ca614a000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=46367, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 46367, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4ca613e000
close(3)                                = 0
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libprocps.so.8", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0\0\0\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=80080, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 225320, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4ca6106000
mmap(0x7f4ca610a000, 40960, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x4000) = 0x7f4ca610a000
mmap(0x7f4ca6114000, 16384, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xe000) = 0x7f4ca6114000
mmap(0x7f4ca6118000, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x11000) = 0x7f4ca6118000
mmap(0x7f4ca611b000, 139304, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f4ca611b000
close(3)                                = 0
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0P\237\2\0\0\0\0\0"..., 832) = 832
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
pread64(3, "\4\0\0\0 \0\0\0\5\0\0\0GNU\0\2\0\0\300\4\0\0\0\3\0\0\0\0\0\0\0"..., 48, 848) = 48
pread64(3, "\4\0\0\0\24\0\0\0\3\0\0\0GNU\0i8\235HZ\227\223\333\350s\360\352,\223\340."..., 68, 896) = 68
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=2216304, ...}, AT_EMPTY_PATH) = 0
pread64(3, "\6\0\0\0\4\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0@\0\0\0\0\0\0\0"..., 784, 64) = 784
mmap(NULL, 2260560, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4ca5ede000
mmap(0x7f4ca5f06000, 1658880, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x28000) = 0x7f4ca5f06000
mmap(0x7f4ca609b000, 360448, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1bd000) = 0x7f4ca609b000
mmap(0x7f4ca60f3000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x214000) = 0x7f4ca60f3000
mmap(0x7f4ca60f9000, 52816, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f4ca60f9000
close(3)                                = 0
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libsystemd.so.0", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0\0\0\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=807936, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 812384, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4ca5e17000
mprotect(0x7f4ca5e2a000, 700416, PROT_NONE) = 0
mmap(0x7f4ca5e2a000, 520192, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x13000) = 0x7f4ca5e2a000
mmap(0x7f4ca5ea9000, 176128, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x92000) = 0x7f4ca5ea9000
mmap(0x7f4ca5ed5000, 32768, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xbd000) = 0x7f4ca5ed5000
mmap(0x7f4ca5edd000, 1376, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f4ca5edd000
close(3)                                = 0
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/liblzma.so.5", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0\0\0\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=170456, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 172296, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4ca5dec000
mmap(0x7f4ca5def000, 110592, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x7f4ca5def000
mmap(0x7f4ca5e0a000, 45056, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e000) = 0x7f4ca5e0a000
mmap(0x7f4ca5e15000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x28000) = 0x7f4ca5e15000
close(3)                                = 0
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libzstd.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0\0\0\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=841808, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 843832, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4ca5d1d000
mmap(0x7f4ca5d27000, 729088, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xa000) = 0x7f4ca5d27000
mmap(0x7f4ca5dd9000, 69632, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xbc000) = 0x7f4ca5dd9000
mmap(0x7f4ca5dea000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xcc000) = 0x7f4ca5dea000
close(3)                                = 0
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/liblz4.so.1", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0\0\0\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=125152, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4ca5d1b000
mmap(NULL, 127072, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4ca5cfb000
mprotect(0x7f4ca5cfd000, 114688, PROT_NONE) = 0
mmap(0x7f4ca5cfd000, 102400, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x2000) = 0x7f4ca5cfd000
mmap(0x7f4ca5d16000, 8192, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b000) = 0x7f4ca5d16000
mmap(0x7f4ca5d19000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1d000) = 0x7f4ca5d19000
close(3)                                = 0
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libcap.so.2", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0\0\0\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=39024, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 41016, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4ca5cf0000
mmap(0x7f4ca5cf3000, 16384, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x3000) = 0x7f4ca5cf3000
mmap(0x7f4ca5cf7000, 8192, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x7000) = 0x7f4ca5cf7000
mmap(0x7f4ca5cf9000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x8000) = 0x7f4ca5cf9000
close(3)                                = 0
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libgcrypt.so.20", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0\0\0\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=1296312, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 1299576, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4ca5bb2000
mprotect(0x7f4ca5bc1000, 1200128, PROT_NONE) = 0
mmap(0x7f4ca5bc1000, 942080, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xf000) = 0x7f4ca5bc1000
mmap(0x7f4ca5ca7000, 253952, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0xf5000) = 0x7f4ca5ca7000
mmap(0x7f4ca5ce6000, 36864, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x133000) = 0x7f4ca5ce6000
mmap(0x7f4ca5cef000, 1144, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f4ca5cef000
close(3)                                = 0
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libgpg-error.so.0", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\0\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\0\0\0\0\0\0\0\0"..., 832) = 832
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=149760, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 151992, PROT_READ, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f4ca5b8c000
mmap(0x7f4ca5b90000, 90112, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x4000) = 0x7f4ca5b90000
mmap(0x7f4ca5ba6000, 40960, PROT_READ, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1a000) = 0x7f4ca5ba6000
mmap(0x7f4ca5bb0000, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x23000) = 0x7f4ca5bb0000
close(3)                                = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4ca5b8a000
mmap(NULL, 12288, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4ca5b87000
arch_prctl(ARCH_SET_FS, 0x7f4ca5b877c0) = 0
set_tid_address(0x7f4ca5b87a90)         = 1646
set_robust_list(0x7f4ca5b87aa0, 24)     = 0
rseq(0x7f4ca5b88160, 0x20, 0, 0x53053053) = 0
mprotect(0x7f4ca60f3000, 16384, PROT_READ) = 0
mprotect(0x7f4ca5bb0000, 4096, PROT_READ) = 0
mprotect(0x7f4ca5ce6000, 12288, PROT_READ) = 0
mprotect(0x7f4ca5cf9000, 4096, PROT_READ) = 0
mprotect(0x7f4ca5d19000, 4096, PROT_READ) = 0
mprotect(0x7f4ca5dea000, 4096, PROT_READ) = 0
mprotect(0x7f4ca5e15000, 4096, PROT_READ) = 0
mprotect(0x7f4ca5ed5000, 28672, PROT_READ) = 0
mprotect(0x7f4ca6118000, 8192, PROT_READ) = 0
mprotect(0x55d0cbbd6000, 4096, PROT_READ) = 0
mprotect(0x7f4ca6184000, 8192, PROT_READ) = 0
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0
munmap(0x7f4ca613e000, 46367)           = 0
getrandom("\xc0\x5f\x86\x25\xbb\x1a\x8a\xe5", 8, GRND_NONBLOCK) = 8
brk(NULL)                               = 0x55d0cc7da000
brk(0x55d0cc7fb000)                     = 0x55d0cc7fb000
prctl(PR_CAPBSET_READ, CAP_MAC_OVERRIDE) = 1
prctl(PR_CAPBSET_READ, 0x30 /* CAP_??? */) = -1 EINVAL (Invalid argument)
prctl(PR_CAPBSET_READ, CAP_CHECKPOINT_RESTORE) = 1
prctl(PR_CAPBSET_READ, 0x2c /* CAP_??? */) = -1 EINVAL (Invalid argument)
prctl(PR_CAPBSET_READ, 0x2a /* CAP_??? */) = -1 EINVAL (Invalid argument)
prctl(PR_CAPBSET_READ, 0x29 /* CAP_??? */) = -1 EINVAL (Invalid argument)
openat(AT_FDCWD, "/proc/self/auxv", O_RDONLY) = 3
newfstatat(3, "", {st_mode=S_IFREG|0400, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(3, "!\0\0\0\0\0\0\0\0\200\364l\374\177\0\0003\0\0\0\0\0\0\0\360\6\0\0\0\0\0\0"..., 1024) = 336
close(3)                                = 0
openat(AT_FDCWD, "/proc/sys/kernel/osrelease", O_RDONLY) = 3
newfstatat(3, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(3, "5.15.90.1-microsoft-standard-WSL"..., 1024) = 34
close(3)                                = 0
openat(AT_FDCWD, "/sys/devices/system/cpu/online", O_RDONLY|O_CLOEXEC) = 3
read(3, "0-15\n", 1024)                 = 5
close(3)                                = 0
openat(AT_FDCWD, "/proc/self/auxv", O_RDONLY) = 3
newfstatat(3, "", {st_mode=S_IFREG|0400, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(3, "!\0\0\0\0\0\0\0\0\200\364l\374\177\0\0003\0\0\0\0\0\0\0\360\6\0\0\0\0\0\0"..., 1024) = 336
close(3)                                = 0
openat(AT_FDCWD, "/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=6213280, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 6213280, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4ca559a000
close(3)                                = 0
openat(AT_FDCWD, "/usr/share/locale/locale.alias", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=2996, ...}, AT_EMPTY_PATH) = 0
read(3, "# Locale name alias data base.\n#"..., 4096) = 2996
read(3, "", 4096)                       = 0
close(3)                                = 0
openat(AT_FDCWD, "/usr/lib/locale/C.UTF-8/LC_IDENTIFICATION", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/locale/C.utf8/LC_IDENTIFICATION", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=258, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 258, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4ca6183000
close(3)                                = 0
openat(AT_FDCWD, "/usr/lib/x86_64-linux-gnu/gconv/gconv-modules.cache", O_RDONLY) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=27002, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 27002, PROT_READ, MAP_SHARED, 3, 0) = 0x7f4ca6143000
close(3)                                = 0
futex(0x7f4ca60f8a6c, FUTEX_WAKE_PRIVATE, 2147483647) = 0
openat(AT_FDCWD, "/usr/lib/locale/C.UTF-8/LC_MEASUREMENT", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/locale/C.utf8/LC_MEASUREMENT", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=23, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 23, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4ca6142000
close(3)                                = 0
openat(AT_FDCWD, "/usr/lib/locale/C.UTF-8/LC_TELEPHONE", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/locale/C.utf8/LC_TELEPHONE", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=47, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 47, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4ca6141000
close(3)                                = 0
openat(AT_FDCWD, "/usr/lib/locale/C.UTF-8/LC_ADDRESS", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/locale/C.utf8/LC_ADDRESS", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=127, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 127, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4ca6140000
close(3)                                = 0
openat(AT_FDCWD, "/usr/lib/locale/C.UTF-8/LC_NAME", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/locale/C.utf8/LC_NAME", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=62, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 62, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4ca613f000
close(3)                                = 0
openat(AT_FDCWD, "/usr/lib/locale/C.UTF-8/LC_PAPER", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/locale/C.utf8/LC_PAPER", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=34, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 34, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4ca613e000
close(3)                                = 0
openat(AT_FDCWD, "/usr/lib/locale/C.UTF-8/LC_MESSAGES", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/locale/C.utf8/LC_MESSAGES", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFDIR|0755, st_size=4096, ...}, AT_EMPTY_PATH) = 0
close(3)                                = 0
openat(AT_FDCWD, "/usr/lib/locale/C.utf8/LC_MESSAGES/SYS_LC_MESSAGES", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=48, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 48, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4ca5599000
close(3)                                = 0
openat(AT_FDCWD, "/usr/lib/locale/C.UTF-8/LC_MONETARY", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/locale/C.utf8/LC_MONETARY", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=270, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 270, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4ca5598000
close(3)                                = 0
openat(AT_FDCWD, "/usr/lib/locale/C.UTF-8/LC_COLLATE", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/locale/C.utf8/LC_COLLATE", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=1406, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 1406, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4ca5597000
close(3)                                = 0
openat(AT_FDCWD, "/usr/lib/locale/C.UTF-8/LC_TIME", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/locale/C.utf8/LC_TIME", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=3360, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 3360, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4ca5596000
close(3)                                = 0
openat(AT_FDCWD, "/usr/lib/locale/C.UTF-8/LC_NUMERIC", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/locale/C.utf8/LC_NUMERIC", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=50, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 50, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4ca5595000
close(3)                                = 0
openat(AT_FDCWD, "/usr/lib/locale/C.UTF-8/LC_CTYPE", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/locale/C.utf8/LC_CTYPE", O_RDONLY|O_CLOEXEC) = 3
newfstatat(3, "", {st_mode=S_IFREG|0644, st_size=353616, ...}, AT_EMPTY_PATH) = 0
mmap(NULL, 353616, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f4ca553e000
close(3)                                = 0
openat(AT_FDCWD, "/usr/share/locale/C.UTF-8/LC_MESSAGES/procps-ng.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/share/locale/C.utf8/LC_MESSAGES/procps-ng.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/share/locale/C/LC_MESSAGES/procps-ng.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/share/locale-langpack/C.UTF-8/LC_MESSAGES/procps-ng.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/share/locale-langpack/C.utf8/LC_MESSAGES/procps-ng.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/share/locale-langpack/C/LC_MESSAGES/procps-ng.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/proc/self/maps", O_RDONLY) = 3
dup3(3, 0, 0)                           = 0
close(3)                                = 0
shmget(IPC_PRIVATE, 42, IPC_CREAT|0666) = 1
shmat(1, NULL, SHM_RDONLY)              = 0x7f4ca553d000
newfstatat(0, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(0, "55d0cbbce000-55d0cbbd0000 r--p 0"..., 1024) = 1024
shmdt(0x7f4ca553d000)                   = 0
shmctl(1, IPC_RMID, NULL)               = 0
newfstatat(AT_FDCWD, "/proc/self/task", {st_mode=S_IFDIR|0555, st_size=0, ...}, 0) = 0
mmap(NULL, 135168, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4ca551d000
mmap(NULL, 135168, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f4ca54fc000
newfstatat(AT_FDCWD, "/proc/1431", {st_mode=S_IFDIR|0555, st_size=0, ...}, 0) = 0
openat(AT_FDCWD, "/proc/1431/stat", O_RDONLY) = 3
read(3, "1431 (minimal) t 1398 1431 339 3"..., 1024) = 272
close(3)                                = 0
openat(AT_FDCWD, "/proc/1431/cmdline", O_RDONLY) = 3
read(3, "/mnt/d/work for vscode/chapter12"..., 2047) = 41
close(3)                                = 0
newfstatat(1, "", {st_mode=S_IFCHR|0620, st_rdev=makedev(0x88, 0x4), ...}, AT_EMPTY_PATH) = 0
write(1, "1431:   /mnt/d/work for vscode/c"..., 491431:   /mnt/d/work for vscode/chapter12/minimal
) = 49
openat(AT_FDCWD, "/proc/1431/maps", O_RDONLY) = 3
newfstatat(3, "", {st_mode=S_IFREG|0444, st_size=0, ...}, AT_EMPTY_PATH) = 0
read(3, "00400000-00401000 r--p 00000000 "..., 1024) = 469
write(1, "0000000000400000      4K r---- m"..., 390000000000400000      4K r---- minimal
) = 39
write(1, "0000000000401000      4K r-x-- m"..., 390000000000401000      4K r-x-- minimal
) = 39
write(1, "00007ffff7ff9000     16K r----  "..., 4200007ffff7ff9000     16K r----   [ anon ]
) = 42
write(1, "00007ffff7ffd000      8K r-x--  "..., 4200007ffff7ffd000      8K r-x--   [ anon ]
) = 42
write(1, "00007ffffffdd000    136K rw---  "..., 4300007ffffffdd000    136K rw---   [ stack ]
) = 43
read(3, "", 1024)                       = 0
close(3)                                = 0
write(1, " total              168K\n", 25 total              168K
) = 25
close(1)                                = 0
close(2)                                = 0
lseek(0, -412, SEEK_CUR)                = 612
exit_group(0)                           = ?
+++ exited with 0 +++
  • 关键信息openat(AT_FDCWD, "/proc/1431/maps", O_RDONLY) = 3

  • ???

  • cat一下看看里面是啥

1
2
3
4
5
6
7
┌──(jungle㉿LAPTOP-A7S3TAA4)-[/mnt/d/work for vscode/chapter12]
└─$ cat /proc/1431/maps 
00400000-00401000 r--p 00000000 00:4a 3377699720805767                   /mnt/d/work for vscode/chapter12/minimal
00401000-00402000 r-xp 00001000 00:4a 3377699720805767                   /mnt/d/work for vscode/chapter12/minimal
7ffff7ff9000-7ffff7ffd000 r--p 00000000 00:00 0                          [vvar]
7ffff7ffd000-7ffff7fff000 r-xp 00000000 00:00 0                          [vdso]
7ffffffdd000-7ffffffff000 rw-p 00000000 00:00 0                          [stack]
  • 使用man 5 proc多阅读

操作系统提供查看进程地址空间的机制

1
int main() {}
1
gcc -static deom.c -o deom
  • 查看maps
1
2
3
4
5
6
7
8
9
$ cat /proc/2017/maps
00400000-00401000 r--p 00000000 00:4a 1688849860573486                   /mnt/d/work for vscode/chapter12/deom
00401000-00498000 r-xp 00001000 00:4a 1688849860573486                   /mnt/d/work for vscode/chapter12/deom
00498000-004c1000 r--p 00098000 00:4a 1688849860573486                   /mnt/d/work for vscode/chapter12/deom
004c1000-004c8000 rw-p 000c0000 00:4a 1688849860573486                   /mnt/d/work for vscode/chapter12/deom
004c8000-004cd000 rw-p 00000000 00:00 0                                  [heap]
7ffff7ff9000-7ffff7ffd000 r--p 00000000 00:00 0                          [vvar]
7ffff7ffd000-7ffff7fff000 r-xp 00000000 00:00 0                          [vdso]
7ffffffdd000-7ffffffff000 rw-p 00000000 00:00 0                          [stack]
  • readelf
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
$ readelf deom -l

Elf file type is EXEC (Executable file)
Entry point 0x401620
There are 10 program headers, starting at offset 64

Program Headers:
  Type           Offset             VirtAddr           PhysAddr
                 FileSiz            MemSiz              Flags  Align
  LOAD           0x0000000000000000 0x0000000000400000 0x0000000000400000
                 0x0000000000000528 0x0000000000000528  R      0x1000
  LOAD           0x0000000000001000 0x0000000000401000 0x0000000000401000
                 0x00000000000964bd 0x00000000000964bd  R E    0x1000
  LOAD           0x0000000000098000 0x0000000000498000 0x0000000000498000
                 0x0000000000028476 0x0000000000028476  R      0x1000
  LOAD           0x00000000000c07b0 0x00000000004c17b0 0x00000000004c17b0
                 0x0000000000005ae0 0x000000000000b490  RW     0x1000
  NOTE           0x0000000000000270 0x0000000000400270 0x0000000000400270
                 0x0000000000000030 0x0000000000000030  R      0x8
  NOTE           0x00000000000002a0 0x00000000004002a0 0x00000000004002a0
                 0x0000000000000044 0x0000000000000044  R      0x4
  TLS            0x00000000000c07b0 0x00000000004c17b0 0x00000000004c17b0
                 0x0000000000000020 0x0000000000000068  R      0x8
  GNU_PROPERTY   0x0000000000000270 0x0000000000400270 0x0000000000400270
                 0x0000000000000030 0x0000000000000030  R      0x8
  GNU_STACK      0x0000000000000000 0x0000000000000000 0x0000000000000000
                 0x0000000000000000 0x0000000000000000  RW     0x10
  GNU_RELRO      0x00000000000c07b0 0x00000000004c17b0 0x00000000004c17b0
                 0x0000000000003850 0x0000000000003850  R      0x1

 Section to Segment mapping:
  Segment Sections...
   00     .note.gnu.property .note.gnu.build-id .note.ABI-tag .rela.plt 
   01     .init .plt .text __libc_freeres_fn .fini 
   02     .rodata .stapsdt.base .eh_frame .gcc_except_table 
   03     .tdata .init_array .fini_array .data.rel.ro .got .got.plt .data __libc_subfreeres __libc_IO_vtables __libc_atexit .bss __libc_freeres_ptrs 
   04     .note.gnu.property 
   05     .note.gnu.build-id .note.ABI-tag 
   06     .tdata .tbss 
   07     .note.gnu.property 
   08     
   09     .tdata .init_array .fini_array .data.rel.ro .got 
  • 动态链接
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
$ cat /proc/2319/maps 
555555554000-555555555000 r--p 00000000 00:4a 1970324837284142           /mnt/d/work for vscode/chapter12/deom
555555555000-555555556000 r-xp 00001000 00:4a 1970324837284142           /mnt/d/work for vscode/chapter12/deom
555555556000-555555557000 r--p 00002000 00:4a 1970324837284142           /mnt/d/work for vscode/chapter12/deom
555555557000-555555559000 rw-p 00002000 00:4a 1970324837284142           /mnt/d/work for vscode/chapter12/deom
7ffff7fbd000-7ffff7fc1000 r--p 00000000 00:00 0                          [vvar]
7ffff7fc1000-7ffff7fc3000 r-xp 00000000 00:00 0                          [vdso]
7ffff7fc3000-7ffff7fc5000 r--p 00000000 08:20 6303                       /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7ffff7fc5000-7ffff7fef000 r-xp 00002000 08:20 6303                       /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7ffff7fef000-7ffff7ffa000 r--p 0002c000 08:20 6303                       /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7ffff7ffb000-7ffff7fff000 rw-p 00037000 08:20 6303                       /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2
7ffffffdd000-7ffffffff000 rw-p 00000000 00:00 0                          [stack]                                                                                
  • 多了链接库的数据

RTFM: /proc/[pid]/maps (man 5 proc)

  • 进程地址空间中的每一段

    • 地址 (范围) 和权限 (rwxsp)
    • 对应的文件: offset, dev, inode, pathname
      • TFM 里有更详细的解释
  • 和 readelf (-l) 里的信息互相验证

    • 课后习题:定义一些代码/数据,观察变化
1
2
3
4
5
6
address           perms offset   dev   inode      pathname
00400000-00401000 r--p  00000000 fd:00 525733     a.out
00401000-00495000 r-xp  00001000 fd:00 525733     a.out
00495000-004bc000 r--p  00095000 fd:00 525733     a.out
004bd000-004c3000 rw-p  000bc000 fd:00 525733     a.out
004c3000-004c4000 rw-p  00000000 00:00 0          [heap]

更完整的地址空间映象

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
0000555555554000 r--p     a.out
0000555555555000 r-xp     a.out
0000555555556000 r--p     a.out
0000555555557000 r--p     a.out
0000555555558000 rw-p     a.out
00007ffff7dc1000 r--p     libc-2.31.so
00007ffff7de3000 r-xp     libc-2.31.so
00007ffff7f5b000 r--p     libc-2.31.so
00007ffff7fa9000 r--p     libc-2.31.so
00007ffff7fad000 rw-p     libc-2.31.so
00007ffff7faf000 rw-p     (这是什么?)
00007ffff7fcb000 r--p     [vvar] (这又是什么?)
00007ffff7fce000 r-xp     [vdso] (这叒是什么?)
00007ffff7fcf000 r--p     (省略相似的 ld-2.31.so)
00007ffffffde000 rw-p     [stack]
ffffffffff600000 --xp     [vsyscall] (这叕是什么?)
  • 是不是 bss? 给我们的代码加一个大数组试试!

RTFM (5 proc): 我们发现的宝藏

vdso (7): Virtual system calls: 只读的系统调用也许可以不陷入内核执行。

  • 无需陷入内核的系统调用
    • 例子: time (2)
      • 直接调试 vdso.c
      • 时间:内核维护秒级的时间 (所有进程映射同一个页面)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
#include <stdio.h>
#include <sys/time.h>
#include <time.h>
#include <unistd.h>

double gettime() {
  struct timeval t;
  gettimeofday(&t, NULL); // trapless system call
  return t.tv_sec + t.tv_usec / 1000000.0;
}

int main() {
  printf("Time stamp: %ld\n", time(NULL)); // trapless system call
  double st = gettime();
  sleep(1);
  double ed = gettime();
  printf("Time: %.6lfs\n", ed - st);
}
  • 例子: gettimeofday (2)
    • RTFSC (非常聪明的实现)
  • 更多的例子:RTFM
    • 计算机系统里没有魔法!我们理解了进程地址空间的全部
/img/Operating System/chapter12-4.png
time address
/img/Operating System/chapter12-5.png
vdso
  • $time\ address \in vdso$,而vdso没有在libc这个位置上面
/img/Operating System/chapter12-6.jpg
实现方式
  • 源码
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#ifdef CONFIG_TIME_NS
static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
					  struct __kernel_timespec *ts)
{
	const struct vdso_data *vd;
	const struct timens_offset *offs = &vdns->offset[clk];
	const struct vdso_timestamp *vdso_ts;
	u64 cycles, last, ns;
	u32 seq;
	s64 sec;

	vd = vdns - (clk == CLOCK_MONOTONIC_RAW ? CS_RAW : CS_HRES_COARSE);
	vd = __arch_get_timens_vdso_data(vd);
	if (clk != CLOCK_MONOTONIC_RAW)
		vd = &vd[CS_HRES_COARSE];
	else
		vd = &vd[CS_RAW];
	vdso_ts = &vd->basetime[clk];

	do {
		seq = vdso_read_begin(vd);

		if (unlikely(!vdso_clocksource_ok(vd)))
			return -1;

		cycles = __arch_get_hw_counter(vd->clock_mode, vd);
		if (unlikely(!vdso_cycles_ok(cycles)))
			return -1;
		ns = vdso_ts->nsec;
		last = vd->cycle_last;
		ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
		ns = vdso_shift_ns(ns, vd->shift);
		sec = vdso_ts->sec;
	} while (unlikely(vdso_read_retry(vd, seq)));

	/* Add the namespace offset */
	sec += offs->sec;
	ns += offs->nsec;

	/*
	 * Do this outside the loop: a race inside the loop could result
	 * in __iter_div_u64_rem() being extremely slow.
	 */
	ts->tv_sec = sec + __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns);
	ts->tv_nsec = ns;

	return 0;
}
#else
static __always_inline
const struct vdso_data *__arch_get_timens_vdso_data(const struct vdso_data *vd)
{
	return NULL;
}

static __always_inline int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
					  struct __kernel_timespec *ts)
{
	return -EINVAL;
}
#endif

(小知识) 系统调用的实现

“执行系统调用时,进程陷入内核态执行”——不,不是的。

  • 系统调用就是一组接口的约定,谁说一定要 int 指令?

    • 光一条指令就要保存 ss, rsp, cs, rip, rflags (40 字节) 到内存
  • SYSCALL — Fast System Call

1
2
3
4
5
6
7
RCX    <- RIP; (* 下条指令执行的地址 *)
RIP    <- IA32_LSTAR;
R11    <- RFLAGS;
RFLAGS <- RFLAGS & ~(IA32_FMASK);
CPL    <- 0; (* 进入 Ring 0 执行 *)
CS.Selector <- IA32_STAR[47:32] & 0xFFFC
SS.Selector <- IA32_STAR[47:32] + 8;

(小知识) 系统调用的实现 (cont’d)

  • 能不能让其他系统调用也 trap 进入内核?

  • 使用共享内存和内核通信!

    • 内核线程在 spinning 等待系统调用的到来

    • 收到系统调用请求后立即开始执行

    • 进程 spin 等待系统调用完成

    • 如果系统调用很多,可以打包处理

进程的地址空间管理

Execve 之后……

  • 进程只有少量内存映射

    • 静态链接:代码、数据、堆栈、堆区

    • 动态链接:代码、数据、堆栈、堆区、INTERP (ld.so)

  • 地址空间里剩下的部分是怎么创建的?

    • libc.so 都没有啊……

    • 创建了以后,我们还能修改它吗?

      • 肯定是能的:动态链接库可以动态加载 (M4)
        • 当然是通过系统调用了
/img/Operating System/chapter12-7.png
/img/Operating System/chapter12-8.png
  • 没有进入main的时候,看不到libc
/img/Operating System/chapter12-9.png
/img/Operating System/chapter12-10.png
  • 执行到main的时候,libc被加载进来了

进程的地址空间 (cont’d)

  • 进程的地址空间 = 内存里若干连续的 “段”

    • 每一段是可访问 (读/写/执行) 的内存
      • 可能映射到某个文件和/或在进程间共享
  • 管理进程地址空间的系统调用

1
2
3
4
5
6
// 映射
void *mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset);
int munmap(void *addr, size_t length);

// 修改映射权限
int mprotect(void *addr, size_t length, int prot);
  • RTFM
    • 说人话:状态上增加/删除/修改一段可访问的内存
    • 也就是上面的示例,内存改变全靠mmap

把文件映射到进程地址空间?

  • 它们的确好像没有什么区别

    • 文件 = 字节序列

    • 内存 = 字节序列

    • 操作系统允许映射好像挺合理的……

      • 带来了很大的方便
        • ELF loader mmap非常容易实现
          • 解析出要加载哪部分到内存,直接mmap就完了
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
$ readelf -l deom

Elf file type is DYN (Position-Independent Executable file)
Entry point 0x1040
There are 13 program headers, starting at offset 64

Program Headers:
  Type           Offset             VirtAddr           PhysAddr
                 FileSiz            MemSiz              Flags  Align
  PHDR           0x0000000000000040 0x0000000000000040 0x0000000000000040
                 0x00000000000002d8 0x00000000000002d8  R      0x8
  INTERP         0x0000000000000318 0x0000000000000318 0x0000000000000318
                 0x000000000000001c 0x000000000000001c  R      0x1
      [Requesting program interpreter: /lib64/ld-linux-x86-64.so.2]
  LOAD           0x0000000000000000 0x0000000000000000 0x0000000000000000
                 0x00000000000005f0 0x00000000000005f0  R      0x1000
  LOAD           0x0000000000001000 0x0000000000001000 0x0000000000001000
                 0x0000000000000145 0x0000000000000145  R E    0x1000
  LOAD           0x0000000000002000 0x0000000000002000 0x0000000000002000
                 0x00000000000000c4 0x00000000000000c4  R      0x1000
  LOAD           0x0000000000002df0 0x0000000000003df0 0x0000000000003df0
                 0x0000000000000220 0x0000000000000228  RW     0x1000
  DYNAMIC        0x0000000000002e00 0x0000000000003e00 0x0000000000003e00
                 0x00000000000001c0 0x00000000000001c0  RW     0x8
  NOTE           0x0000000000000338 0x0000000000000338 0x0000000000000338
                 0x0000000000000030 0x0000000000000030  R      0x8
  NOTE           0x0000000000000368 0x0000000000000368 0x0000000000000368
                 0x0000000000000044 0x0000000000000044  R      0x4
  GNU_PROPERTY   0x0000000000000338 0x0000000000000338 0x0000000000000338
                 0x0000000000000030 0x0000000000000030  R      0x8
  GNU_EH_FRAME   0x0000000000002004 0x0000000000002004 0x0000000000002004
                 0x000000000000002c 0x000000000000002c  R      0x4
  GNU_STACK      0x0000000000000000 0x0000000000000000 0x0000000000000000
                 0x0000000000000000 0x0000000000000000  RW     0x10
  GNU_RELRO      0x0000000000002df0 0x0000000000003df0 0x0000000000003df0
                 0x0000000000000210 0x0000000000000210  R      0x1

 Section to Segment mapping:
  Segment Sections...
   00     
   01     .interp 
   02     .interp .note.gnu.property .note.gnu.build-id .note.ABI-tag .gnu.hash .dynsym .dynstr .gnu.version .gnu.version_r .rela.dyn 
   03     .init .plt .plt.got .text .fini 
   04     .rodata .eh_frame_hdr .eh_frame 
   05     .init_array .fini_array .dynamic .got .data .bss 
   06     .dynamic 
   07     .note.gnu.property 
   08     .note.gnu.build-id .note.ABI-tag 
   09     .note.gnu.property 
   10     .eh_frame_hdr 
   11     
   12     .init_array .fini_array .dynamic .got 
  • readelf告诉系统该进程载入的时候要把什么东西加载到哪里,OS通过mmap进行加载

使用 Memory Mapping

Example 1:

  • 用 mmap 申请大量内存空间(mmap-alloc.c)
    • 瞬间完成
    • 不妨strace/gdb看一下
    • libcmalloc/free 在初始空间用完后使用 sbrk/mmap 申请空间
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
#include <unistd.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/mman.h>

#define GiB * (1024LL * 1024 * 1024)

int main() {
  volatile uint8_t *p = mmap(NULL, 3 GiB, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
  printf("mmap: %lx\n", (uintptr_t)p);
  if ((intptr_t)p == -1) {
    perror("cannot map");
    exit(1);
  }
  *(int *)(p + 1 GiB) = 114;
  *(int *)(p + 2 GiB) = 514;
  printf("Read get: %d\n", *(int *)(p + 1 GiB));
  printf("Read get: %d\n", *(int *)(p + 2 GiB));
}
1
2
3
4
5
6
7
8
9
$ time ./mmap-alloc
mmap: 7f091f6c8000
Read get: 114
Read get: 514

real    0.01s
user    0.00s
sys     0.00s
cpu     47%
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
strace -T ./mmap-alloc
execve("./mmap-alloc", ["./mmap-alloc"], 0x7ffdbcd061d8 /* 41 vars */) = 0 <0.005476>
arch_prctl(0x3001 /* ARCH_??? */, 0x7fff0b4c4fc0) = -1 EINVAL (Invalid argument) <0.000046>
brk(NULL)                               = 0x1fcd000 <0.000034>
brk(0x1fcddc0)                          = 0x1fcddc0 <0.000031>
arch_prctl(ARCH_SET_FS, 0x1fcd3c0)      = 0 <0.000027>
set_tid_address(0x1fcd690)              = 4689 <0.000037>
set_robust_list(0x1fcd6a0, 24)          = 0 <0.000027>
rseq(0x1fcdd60, 0x20, 0, 0x53053053)    = 0 <0.000024>
uname({sysname="Linux", nodename="LAPTOP-A7S3TAA4", ...}) = 0 <0.000026>
prlimit64(0, RLIMIT_STACK, NULL, {rlim_cur=8192*1024, rlim_max=RLIM64_INFINITY}) = 0 <0.000038>
readlink("/proc/self/exe", "/mnt/d/work for vscode/chapter12"..., 4096) = 43 <0.000143>
getrandom("\x2d\xdd\xc2\x27\x6a\x77\x7d\x7f", 8, GRND_NONBLOCK) = 8 <0.000028>
brk(0x1feedc0)                          = 0x1feedc0 <0.000043>
brk(0x1fef000)                          = 0x1fef000 <0.000047>
mprotect(0x4c1000, 16384, PROT_READ)    = 0 <0.000043>
mmap(NULL, 3221225472, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f1dcee47000 <0.000041>
newfstatat(1, "", {st_mode=S_IFCHR|0620, st_rdev=makedev(0x88, 0x4), ...}, AT_EMPTY_PATH) = 0 <0.000048>
write(1, "mmap: 7f1dcee47000\n", 19mmap: 7f1dcee47000
)    = 19 <0.000067>
write(1, "Read get: 114\n", 14Read get: 114
)         = 14 <0.000050>
write(1, "Read get: 514\n", 14Read get: 514
)         = 14 <0.000046>
exit_group(0)                           = ?
+++ exited with 0 +++
  • mmap只花了0.000041s!

Example 2:

1
2
3
4
5
6
7
#!/usr/bin/env python3

import mmap, hexdump

with open('/dev/sda', 'rb') as fp:
    mm = mmap.mmap(fp.fileno(), prot=mmap.PROT_READ, length=128 << 30)
    hexdump.hexdump(mm[:512])

Memory-Mapped File: 一致性

  • 但我们好像带来了一些问题……
    • 如果把页面映射到文件
      • 修改什么时候生效?
        • 立即生效:那会造成巨大量的磁盘 I/O
        • unmap (进程终止) 时生效:好像又太迟了……
      • 若干个映射到同一个文件的进程?
        • 共享一份内存?
        • 各自有本地的副本?

  • 请查阅手册,看看操作系统是如何规定这些操作的行为的

    • 例如阅读 msync (2)

    • 这才是操作系统真正的复杂性

地址空间的隔离

地址空间:实现进程隔离

  • 每个 *ptr 都只能访问本进程 (状态机) 的内存

    • 除非 mmap 显示指定、映射共享文件或共享内存多线程

    • 实现了操作系统最重要的功能:进程之间的隔离


  • 任何一个程序都不能因为 bug 或恶意行为侵犯其他程序执行

    • “连方法都没有”

    • 吗……?

电子游戏的上一个黄金时代

  • 电子竞技的先行者:“即时战略游戏” (Real-Time Strategy)
    • Command and Conquer (Westwood), Starcraft (Microsoft), …
      • 如果我们想 “侵犯” 游戏的执行……呢?

前互联网时代的神器 (1): 金山游侠

  • 在进程的内存中找到代表 “金钱”、“生命” 的重要属性并且改掉

  • 只要有访问其他进程内存和在程序上 “悬浮显示” 的 API 即可

    • 想象成是另一个进程内存的 “调试器”

    • 在 Linux 中可以轻松拥有:dosbox-hack.c

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <stdint.h>
#include <sys/mman.h>
#include <unistd.h>
#include <fcntl.h>
#include <stdbool.h>

#define LENGTH(arr)  (sizeof(arr) / sizeof(arr[0]))

int n, fd, pid;
uint64_t found[4096];
bool reset;

void scan(uint16_t val) {
  uintptr_t start, kb;
  char perm[16];
  FILE *fp = popen("pmap -x $(pidof dosbox) | tail -n +3", "r"); assert(fp);

  if (reset) n = 0;
  while (fscanf(fp, "%lx", &start) == 1 && (intptr_t)start > 0) {
    assert(fscanf(fp, "%ld%*ld%*ld%s%*[^\n]s", &kb, perm) >= 1);
    if (perm[1] != 'w') continue;

    uintptr_t size = kb * 1024;
    char *mem = malloc(size); assert(mem);
    assert(lseek(fd, start, SEEK_SET) != (off_t)-1);
    assert(read(fd, mem, size) == size);
    for (int i = 0; i < size; i += 2) {
      uint16_t v = *(uint16_t *)(&mem[i]);
      if (reset) {
        if (val == v && n < LENGTH(found)) found[n++] = start + i;
      } else {
        for (int j = 0; j < n; j++) {
	  if (found[j] == start + i && v != val) found[j] = 0;
	}
      }
    }
    free(mem);
  }
  pclose(fp);

  int s = 0;
  for (int i = 0; i < n; i++) {
    if (found[i] != 0) s++;
  }
  reset = false;
  printf("There are %d match(es).\n", s);
}

void overwrite(uint16_t val) {
  int s = 0;
  for (int i = 0; i < n; i++)
    if (found[i] != 0) {
      assert(lseek(fd, found[i], SEEK_SET) != (off_t)-1);
      write(fd, &val, 2);
      s++;
    }
  printf("%d value(s) written.\n", s);
}

int main() {
  char buf[32];
  setbuf(stdout, NULL);

  FILE *fp = popen("pidof dosbox", "r");
  assert(fscanf(fp, "%d", &pid) == 1);
  pclose(fp);

  sprintf(buf, "/proc/%d/mem", pid);
  fd = open(buf, O_RDWR); assert(fd > 0);

  for (reset = true; !feof(stdin); ) {
    int val;
    printf("(DOSBox %d) ", pid);
    if (scanf("%s", buf) <= 0) { close(fd); exit(0); }
    switch (buf[0]) {
      case 'q': close(fd); exit(0); break;
      case 's': scanf("%d", &val); scan(val); break;
      case 'w': scanf("%d", &val); overwrite(val); break;
      case 'r': reset = true; printf("Search results reset.\n"); break;
    }
  }
}

前互联网时代的神器 (2): 按键精灵

  • 大量重复固定的任务 (例如 2 秒 17 枪)

  • 这个简单,就是给进程发送键盘/鼠标事件

    • 做个驱动;或者

    • 利用操作系统/窗口管理器提供的 API

    • xdotool (我们用这玩意测试 vscode 的插件)

    • evdev (我们用这玩意显示按键;仅课堂展示有效)

前互联网时代的神器 (3): 变速齿轮

  • 调整游戏的逻辑更新速度

  • 本质是 “欺骗” 进程的时钟

    • 源头:闹钟、睡眠、gettimeofday

    • 拦截它们需要稍稍更复杂的技术

更强大的游戏外挂?

  • 游戏也是程序,也是状态机

    • 通过 API 调用 (和系统调用) 最终取得状态、修改状态

    • 想象成是一个 “为这个游戏专门设计的 gdb”

代码注入 (Hooking)

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#include <stdio.h>
#include <string.h>
#include <sys/mman.h>
#include <stdint.h>
#include <assert.h>
#include <unistd.h>

void foo()     { printf("In old function %s\n", __func__); }
void foo_new() { printf("In new function %s\n", __func__); }

// 48 b8 ff ff ff ff ff ff ff ff    movabs $0xffffffffffffffff,%rax
// ff e0                            jmpq   *%rax
void DSU(void *old, void *new) {
  #define ROUNDDOWN(ptr) ((void *)(((uintptr_t)ptr) & ~0xfff))
  size_t    pg_size = sysconf(_SC_PAGESIZE);
  char *pg_boundary = ROUNDDOWN(old);
  int         flags = PROT_WRITE | PROT_READ | PROT_EXEC;

  printf("Dynamically updating... "); fflush(stdout);

  mprotect(pg_boundary, 2 * pg_size, flags);
  memcpy(old +  0, "\x48\xb8", 2);
  memcpy(old +  2,       &new, 8);
  memcpy(old + 10, "\xff\xe0", 2);
  mprotect(pg_boundary, 2 * pg_size, flags & ~PROT_WRITE);

  printf("Done"); fflush(stdout);
}

int main() {
  foo();
  DSU(foo, foo_new);
  foo();
}
1
2
3
./dsu
In old function foo
Dynamically updating... DoneIn new function foo_new
  • 原理同变速齿轮,修改代码,让代码跳转到自己写的部分去

  • 详细版本《软件动态更新技术》

  • The Dark Side

    • 对于外挂,代码可以静态/动态/vtable/DLL...注入

    • render(objects)render_hacked(objects)

游戏外挂:攻与防

  • 控制/数据流完整性

    • 保护进程的完整性

      • 独立的进程/驱动做完整性验证
    • 保护隐私数据不被其他进程读写

      • 拦截向本进程的 ReadProcessMemoryWriteProcessMemory,发现后立即拒绝执行
    • 例子

  • 其他解决方法

    • AI 监控/社会工程学:如果你强得不正常,当然要盯上你

    • 云/沙盒 (Enclave) 渲染:“计算不再信任操作系统”

总结

总结

  • 本次课回答的问题
    • Q: 进程的地址空间是如何创建、如何更改的?

  • Take-away messages

    • 进程的地址空间

      • 能文件关联的、带有访问权限的连续内存段
        • a.out, ld.so, libc.so, heap, stack, vdso
    • 进程地址空间的管理 API

      • mmap

声明:本文章引用资料与图像均已做标注,如有侵权本人会马上删除