环境

Aarch64
Qemu
aarch64-linux-gnu-gcc
linux-4.14
 

概述

    栈回溯的目的是将函数的调用栈打印出来,对于分析函数调用和debug系统异常会很有帮助。对于Aarch64,x29用于用来当做帧指针,x30用来存放函数返回地址。
 

正文

原理

首先通过一个简单的程序分析一下栈回溯的原理,下面是测试程序:
 #include <stdio.h>

 int func3(int b)
{
int a = ;
printf("a = %d\n", a + b);
return a;
} int func2(int d)
{
int b; b = func3(d);
printf("b = %d\n", b + d);
return b;
} int func1(int a)
{
int d; d = func2(a);
printf("d = %d\n", d);
return d;
} int main(int argc, const char *argv[])
{
int a = ; func1(a);
return ;
}

然后我们对其进行编译和反汇编:

aarch64-linux-gnu-gcc a.c -o main
aarch64-linux-gnu-objdump -D main > main.S
下面是main.S文件:
 000000000040055c <func3>:
40055c: a9bd7bfd stp x29, x30, [sp, #-]!
: 910003fd mov x29, sp
: b9001fa0 str w0, [x29, #]
: mov w0, #0xa // #
40056c: b9002fa0 str w0, [x29, #]
: b9402fa1 ldr w1, [x29, #]
: b9401fa0 ldr w0, [x29, #]
: 0b000021 add w1, w1, w0
40057c: adrp x0, <_init-0x3e8>
: 911b8000 add x0, x0, #0x6e0
: 97ffffb3 bl <printf@plt>
: b9402fa0 ldr w0, [x29, #]
40058c: a8c37bfd ldp x29, x30, [sp], #
: d65f03c0 ret <func2>:
: a9bd7bfd stp x29, x30, [sp, #-]!
: 910003fd mov x29, sp
40059c: b9001fa0 str w0, [x29, #]
4005a0: b9401fa0 ldr w0, [x29, #]
4005a4: 97ffffee bl 40055c <func3>
4005a8: b9002fa0 str w0, [x29, #]
4005ac: b9402fa1 ldr w1, [x29, #]
4005b0: b9401fa0 ldr w0, [x29, #]
4005b4: 0b000021 add w1, w1, w0
4005b8: adrp x0, <_init-0x3e8>
4005bc: 911ba000 add x0, x0, #0x6e8
4005c0: 97ffffa4 bl <printf@plt>
4005c4: b9402fa0 ldr w0, [x29, #]
4005c8: a8c37bfd ldp x29, x30, [sp], #
4005cc: d65f03c0 ret 00000000004005d0 <func1>:
4005d0: a9bd7bfd stp x29, x30, [sp, #-]!
4005d4: 910003fd mov x29, sp
4005d8: b9001fa0 str w0, [x29, #]
4005dc: b9401fa0 ldr w0, [x29, #]
4005e0: 97ffffed bl <func2>
4005e4: b9002fa0 str w0, [x29, #]
4005e8: adrp x0, <_init-0x3e8>
4005ec: 911bc000 add x0, x0, #0x6f0
4005f0: b9402fa1 ldr w1, [x29, #]
4005f4: 97ffff97 bl <printf@plt>
4005f8: b9402fa0 ldr w0, [x29, #]
4005fc: a8c37bfd ldp x29, x30, [sp], #
: d65f03c0 ret <main>:
: a9bd7bfd stp x29, x30, [sp, #-]!
: 910003fd mov x29, sp
40060c: b9001fa0 str w0, [x29, #]
: f9000ba1 str x1, [x29, #]
: mov w0, #0xa // #
: b9002fa0 str w0, [x29, #]
40061c: b9402fa0 ldr w0, [x29, #]
: 97ffffec bl 4005d0 <func1>
: mov w0, #0x0 // #
: a8c37bfd ldp x29, x30, [sp], #
40062c: d65f03c0 ret
 
main:
第50行,将main函数的返回地址和上级函数的栈底存入main函数的栈底,剩余的main栈用于存放main的局部变量
第59行,执行完毕后,x30中存放的是main的返回地址,x29指向的是上一级函数的栈底
func1:
第35行,将func1函数的返回地址和main函数的栈底存入func1函数的栈底,剩余的func1栈用于存放func1的局部变量
第46行,执行完毕后,x30中存放的是func1的返回地址,即第58行,x29指向的是main函数的栈底
func2:
第18行,将func2函数的返回地址和func1函数的栈底存入func2函数的栈底,剩余的func2栈用于存放func2的局部变量
第31行,执行完毕后,x30中存放的是func2的返回地址,即第40行,x29指向的是func1函数的栈底
func3:
第2行,将func3函数的返回地址func3函数的栈底,剩余的func3栈用于存放func3的局部变量
第14行,执行完毕后,x30中存放的是func3的返回地址,即第23行,x29指向的是func2函数的栈底
 
最终可以得到下面的示意图:
 
所以,在函数func3中,就可以通过上面的结构就可以从func3回溯到main函数。
gcc提供了编译选项-fomit-frame-pointer和-fno-omit-frame-pointer,如果在编译时指定了-fno-omit-frame-pointer,那么就没有帧指针了,所以也就无法进行栈回溯了,默认有帧指针。

使用API进行栈回溯

在用户空间提供了回溯用的API:
#include <execinfo.h>
int backtrace(void **buffer, int size);
char **backtrace_symbols(void *const *buffer, int size);

在func3使用上面的两个函数回溯一下:

 #include <execinfo.h>

 int func3(int b)
{
int a = , n, i;
void *buffer[];
char **strings; n = backtrace(buffer, );
strings = backtrace_symbols(buffer, n); for (i = ; i < n; i++)
printf("%s\n", strings[i]); printf("a = %\n", a + b); return a;
 

编译:

aarch64-linux-gnu-gcc -funwind-tables -rdynamic -O0 -g a.c -o main
 
运行:
[root@aarch64 ~]# ./main
./main(func3+0x20) [0x400a1c]
./main(func2+0x14) [0x400aa4]
./main(func1+0x14) [0x400ae0]
./main(main+0x20) [0x400b20]
/lib/libc.so.(__libc_start_main+0xec) [0xffffb8d732c8]
a =
b =
d =
 

使用内敛汇编进行栈回溯

如果不使用上面的API的话,也可以用访问寄存器的方式来完成:
 #include <execinfo.h>

 typedef struct {
unsigned long x29;
unsigned long x30;
} node_t; void back(void)
{
node_t *addr; printf("\nBackstrace not use api:\n"); asm volatile("mov %0, x29\n\t":"=r"(addr)::);
while(addr && addr->x30 && addr->x29) {
printf("\t%p\n", addr->x30);
addr = (node_t *)addr->x29;
}
} int func3(int b)
{
int a = , n, i;
void *buffer[];
char **strings; n = backtrace(buffer, );
strings = backtrace_symbols(buffer, n); printf("\nBackstrace use api:\n");
for (i = ; i < n; i++)
printf("\t%s\n", strings[i]); back(); printf("a = %d\n", a + b);
return a;
}
 
下面是运行结果:
[root@aarch64 ~]# ./main

Backstrace use api:
./main(func3+0x20) [0x400ab4]
./main(func2+0x14) [0x400b54]
./main(func1+0x14) [0x400b90]
./main(main+0x20) [0x400bd0]
/lib/libc.so.(__libc_start_main+0xec) [0xffff993d12c8] Backstrace not use api:
0x400b1c
0x400b54
0x400b90
0x400bd0
0xffff993d12c8
a =
b =
d =
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 

使用GCC的内部函数进行栈回溯

gcc提供了两个内置的函数用来获取函数的返回地址和帧指针的值:
 
下面用这两个宏实现以下回溯:
 #include <execinfo.h>

 typedef struct {
unsigned long x29;
unsigned long x30;
} node_t; void back(void)
{
node_t *addr; printf("\nBackstrace not use api:\n"); asm volatile("mov %0, x29\n\t":"=r"(addr)::);
while(addr && addr->x30 && addr->x29) {
printf("\t%p\n", addr->x30);
addr = (node_t *)addr->x29;
}
} void back_builtin(void)
{
node_t *addr; printf("\nBackstrace using builtin func:\n"); printf("the return address of back_builtin: %p\n",
__builtin_return_address());
addr = __builtin_frame_address();
while(addr && addr->x30 && addr->x29) {
printf("\t%p\n", addr->x30);
addr = (node_t *)addr->x29;
}
} int func3(int b)
{
int a = , n, i;
void *buffer[];
char **strings; n = backtrace(buffer, );
strings = backtrace_symbols(buffer, n); printf("\nBackstrace use api:\n");
for (i = ; i < n; i++)
printf("\t%s\n", strings[i]); back(); back_builtin(); printf("a = %d\n", a + b);
return a;
} 
 
 
运行结果:
[root@aarch64 ~]# ./main 

Backstrace use api:
./main(func3+0x20) [0x400b74]
./main(func2+0x14) [0x400c18]
./main(func1+0x14) [0x400c54]
./main(main+0x20) [0x400c94]
/lib/libc.so.(__libc_start_main+0xec) [0xffffb92632c8] Backstrace not use api:
0x400bdc
0x400c18
0x400c54
0x400c94
0xffffb92632c8 Backstrace using builtin func:
the return address of back_builtin: 0x400be0
0x400be0
0x400c18
0x400c54
0x400c94
0xffffb92632c8
a =
b =
d =

不使用API进行栈回溯的缺点是只能打印出地址,但是却无法打印出具体的符号名字。

 

内核的栈回溯

在Linux内核中可以使用WARN_ON还输出当前的函数调用栈。下面是一个测试程序输出的log:
[   20.419376] ------------[ cut here ]------------
[ 20.420062] WARNING: CPU: PID: at /home/pengdonglin/disk_ext/Qemu/aarch64/demo_driver/demo.c: demo_init+0xc/0x1000 [demo]
[ 20.420546] Modules linked in: demo(O+)
[ 20.421004] CPU: PID: Comm: insmod Tainted: G O 4.14.-ga06114e5 #
[ 20.421371] Hardware name: linux,dummy-virt (DT)
[ 20.421600] task: ffff80000831af00 task.stack: ffff00000aef8000
[ 20.421899] PC is at demo_init+0xc/0x1000 [demo]
[ 20.422142] LR is at do_one_initcall+0x44/0x130
[ 20.422360] pc : [<ffff000000c5500c>] lr : [<ffff000008083cc4>] pstate:
[ 20.422657] sp : ffff00000aefbc40
[ 20.422853] x29: ffff00000aefbc40 x28: ffff000000c520d0
[ 20.423140] x27: 00000000014000c0 x26: ffff80000836d800
[ 20.423367] x25: ffff0000081b3848 x24: ffff800079596080
[ 20.423570] x23: x22: ffff800079596200
[ 20.423787] x21: ffff80000831af00 x20:
[ 20.423989] x19: ffff000000c55000 x18:
[ 20.424189] x17: x16:
[ 20.424390] x15: ffffffffffffffff x14: ffffffffffffffff
[ 20.424591] x13: ffffffffffffffff x12: 00000000a49cf051
[ 20.424791] x11: ffff80000831b7f0 x10:
[ 20.425019] x9 : ffff00000aefba10 x8 : ffff0000091fc000
[ 20.425227] x7 : ffff000008246790 x6 :
[ 20.425428] x5 : x4 :
[ 20.425629] x3 : x2 :
[ 20.425828] x1 : ffff80000831af00 x0 :
[ 20.426106]
[ 20.426106] X1: 0xffff80000831ae80:
[ 20.426292] ae80
[ 20.426774] aea0
[ 20.427177] aec0
[ 20.427568] aee0
[ 20.427973] af00 ffffffff ffffffff
[ 20.428389] af20 0aef8000 ffff0000
[ 20.428899] af40 fffeee58 7c082f00 ffff8000
[ 20.429415] af60 08c60b88 ffff0000
[ 20.429962]
[ 20.429962] X11: 0xffff80000831b770:
[ 20.430199] b770
[ 20.430706] b790 00000a50 082c2790 ffff0000 080816dc ffff0000
[ 20.431223] b7b0 00000a4f 00000a50 080ed6bc ffff0000 08081f18 ffff0000
[ 20.431743] b7d0 000009b8
[ 20.432262] b7f0 08175f18 ffff0000 09198b88 ffff0000
[ 20.432779] b810 000c0001 683da59d 64dfae01 08175c7c ffff0000 09198b88 ffff0000
[ 20.433297] b830 000c0001 4ccad968 18f18caf 08175e48 ffff0000
[ 20.433813] b850 09198cb8 ffff0000 000c0005 e69cf65f 335c3458
[ 20.434334]
[ 20.434334] X21: 0xffff80000831ae80:
[ 20.434561] ae80
[ 20.435073] aea0
[ 20.435598] aec0
[ 20.436228] aee0
[ 20.436756] af00 ffffffff ffffffff
[ 20.437271] af20 0aef8000 ffff0000
[ 20.437789] af40 fffeee58 7c082f00 ffff8000
[ 20.438466] af60 08c60b88 ffff0000
[ 20.439138]
[ 20.439138] X22: 0xffff800079596180:
[ 20.439386] 746f6e2e 6e672e65 75622e75 2d646c69
[ 20.440006] 61a0
[ 20.440529] 61c0
[ 20.441098] 61e0
[ 20.441608] ffff8000 6f6d6564 00c55000 ffff0000
[ 20.442065]
[ 20.442529]
[ 20.443078]
[ 20.443652]
[ 20.443652] X24: 0xffff800079596000:
[ 20.443856] 7274732e
[ 20.444321]
[ 20.444809]
[ 20.445273]
[ 20.445707] ffff8000 7a34a700 ffff8000
[ 20.446208] 60a0 09e94940 ffff0000 00c51000 ffff0000
[ 20.446775] 60c0 081b3720 ffff0000
[ 20.447339] 60e0
[ 20.447919]
[ 20.447919] X26: 0xffff80000836d780:
[ 20.448134] d780
[ 20.448637] d7a0
[ 20.449144] d7c0
[ 20.449641] d7e0
[ 20.450148] d800 08eee848 ffff0000 0836dca8 ffff8000
[ 20.450646] d820 0000000d 7a34a700 ffff8000
[ 20.451149] d840 09e94938 ffff0000 081b3848 ffff0000
[ 20.451652] d860 7a34a700 ffff8000
[ 20.452138]
[ 20.452334] Call trace:
[ 20.452522] Exception stack(0xffff00000aefbb00 to 0xffff00000aefbc40)
[ 20.452823] bb00: ffff80000831af00
[ 20.453076] bb20: ffff000008246790
[ 20.453321] bb40: ffff0000091fc000 ffff00000aefba10 ffff80000831b7f0
[ 20.453544] bb60: 00000000a49cf051 ffffffffffffffff ffffffffffffffff ffffffffffffffff
[ 20.453762] bb80: ffff000000c55000
[ 20.453979] bba0: ffff80000831af00 ffff800079596200
[ 20.454199] bbc0: ffff800079596080 ffff0000081b3848 ffff80000836d800 00000000014000c0
[ 20.454419] bbe0: ffff000000c520d0 ffff00000aefbc40 ffff000008083cc4 ffff00000aefbc40
[ 20.454639] bc00: ffff000000c5500c ffff0000081b951c
[ 20.454880] bc20: 0000ffffffffffff ffff00000818d420 ffff00000aefbc40 ffff000000c5500c
[ 20.455190] [<ffff000000c5500c>] demo_init+0xc/0x1000 [demo]
[ 20.455419] [<ffff000008083cc4>] do_one_initcall+0x44/0x130
[ 20.455645] [<ffff0000081b9548>] do_init_module+0x64/0x1d4
[ 20.455859] [<ffff0000081b809c>] load_module+0x1e1c/0x24f0
[ 20.456096] [<ffff0000081b88f0>] SyS_init_module+0x180/0x218
[ 20.456299] Exception stack(0xffff00000aefbec0 to 0xffff00000aefc000)
[ 20.456522] bec0: 0000ffffa3ec4010 000000000003b850 00000000005cad68
[ 20.456776] bee0: 00000000ffffffff 000000001bd34680 00000000005e2bf8
[ 20.457026] bf00: 00000000005e4dc0
[ 20.457288] bf20: 00000000005e9000 00000000005e3000
[ 20.457539] bf40:
[ 20.457791] bf60: 0000fffff0d52878 0000fffff0d52880
[ 20.458052] bf80: 0000fffff0d52888 00000000005cad68 00000000005b9b24
[ 20.458295] bfa0: 0000fffff0d52720 000000000045bc74 0000fffff0d52440
[ 20.458546] bfc0: 0000000000409bf8 0000ffffa3ec4010
[ 20.458776] bfe0:
[ 20.459002] [<ffff000008083ac0>] el0_svc_naked+0x34/0x38
[ 20.459211] ---[ end trace 1d225ceb44d51601 ]---

分析:

include/asm-generic/bug.h:

#define WARN_ON(condition) ({                                           \
int __ret_warn_on = !!(condition); \
if (unlikely(__ret_warn_on)) \
__WARN(); \
unlikely(__ret_warn_on); \
})

当给WARN_ON传递非0的参数时,就会调用__WARN()打印栈:

 // 定义在include/asm-generic/bug.h中
#define __WARN() __WARN_TAINT(TAINT_WARN) // TAINT_WARN是9
#define __WARN_TAINT(taint) __WARN_FLAGS(BUGFLAG_TAINT(taint)) // 定义在arch/arm64/include/asm/bug.h中
#define __WARN_FLAGS(flags) __BUG_FLAGS(BUGFLAG_WARNING|(flags)) // flags是0x900
#define __BUG_FLAGS(flags) \
asm volatile (__stringify(ASM_BUG_FLAGS(flags))); // flags是0x901 #define _BUGVERBOSE_LOCATION(file, line) __BUGVERBOSE_LOCATION(file, line)
#define __BUGVERBOSE_LOCATION(file, line) \
.pushsection .rodata.str,"aMS",@progbits,; \
: .string file; \
.popsection; \
\
.long 2b - 0b; \
.short line; #define __BUG_ENTRY(flags) \
.pushsection __bug_table,"aw"; \
.align ; \
: .long 1f - 0b; \
_BUGVERBOSE_LOCATION(__FILE__, __LINE__) \
.short flags; \
.popsection; \
: #define ASM_BUG_FLAGS(flags) \
__BUG_ENTRY(flags) \
brk BUG_BRK_IMM

第29行,在__bug_table中增加一个段,其中存放的数据(如brk指令的地址,以及文件名和行号等)在异常处理程序中会被访问,可以参考find_bug函数

第30行的brk BUG_BRK_IMM中的宏BUG_BRK_IMM定义如下:
/*
* #imm16 values used for BRK instruction generation
* Allowed values for kgdb are 0x400 - 0x7ff
* 0x100: for triggering a fault on purpose (reserved)
* 0x400: for dynamic BRK instruction
* 0x401: for compile time BRK instruction
* 0x800: kernel-mode BUG() and WARN() traps
*/
#define FAULT_BRK_IMM 0x100
#define KGDB_DYN_DBG_BRK_IMM 0x400
#define KGDB_COMPILED_DBG_BRK_IMM 0x401
#define BUG_BRK_IMM 0x800

也就是在WARN_ON的最后会执行brk 0x800,这条指令会触发一个debug同步异常:

Linux内核的异常入口在arch/arm64/kernel/entry.S中,这里对应的是el1_sync:
 /*
* EL1 mode handlers.
*/
.align
el1_sync:
kernel_entry
mrs x1, esr_el1 // read the syndrome register
lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class
cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1
b.eq el1_da
cmp x24, #ESR_ELx_EC_IABT_CUR // instruction abort in EL1
b.eq el1_ia
cmp x24, #ESR_ELx_EC_SYS64 // configurable trap
b.eq el1_undef
cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception
b.eq el1_sp_pc
cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
b.eq el1_sp_pc
cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL1
b.eq el1_undef
cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1
b.ge el1_dbg
b el1_inv ... ... el1_dbg:
/*
* Debug exception handling
*/
cmp x24, #ESR_ELx_EC_BRK64 // if BRK64
cinc x24, x24, eq // set bit ''
tbz x24, #, el1_inv // EL1 only
mrs x0, far_el1
mov x2, sp // struct pt_regs
bl do_debug_exception
kernel_exit
 
 
第36行,调用do_debug_exception时,x0存放的是异常指令的地址,x1存放的是ESR_EL1,其中存放了异常的类型,x2存放的是struct pt_regs的首地址,在kernel_entry宏中会将通用寄存器的值存放到到其中,struct pt_regs的定义如下:
 /*
* This struct defines the way the registers are stored on the stack during an
* exception. Note that sizeof(struct pt_regs) has to be a multiple of 16 (for
* stack alignment). struct user_pt_regs must form a prefix of struct pt_regs.
*/
struct pt_regs {
union {
struct user_pt_regs user_regs;
struct {
u64 regs[]; // 存放发生异常时X0~X30的值
u64 sp; // 存放struct pt_regs的首地址
u64 pc; // 用于存放发生异常的指令的地址(后面在bug_handler中会加4来修正,就可以从发生异常的指令的下一条指令继续执行)
u64 pstate; // 用于存放异常发生时的PSTATE的状态
};
};
u64 orig_x0;
#ifdef __AARCH64EB__
u32 unused2;
s32 syscallno;
#else
s32 syscallno;
u32 unused2;
#endif u64 orig_addr_limit; // 用于备份异常进程的thread_info.addr_limit
u64 unused; // maintain 16 byte alignment
u64 stackframe[]; // 用于存放x29和异常指令的地址
};
 
这里重点关注一下pt_regs的stackframe[2],用于是用于将进程栈异常栈连接在一起,这样就可以从异常栈里一直回溯到进程栈。连接过程是在arch/arm64/kernel/entry.S中:
 mrs    x22, elr_el1
mrs x23, spsr_el1
stp lr, x21, [sp, #S_LR] /*
* In order to be able to dump the contents of struct pt_regs at the
* time the exception was taken (in case we attempt to walk the call
* stack later), chain it together with the stack frames.
*/
.if \el ==
stp xzr, xzr, [sp, #S_STACKFRAME]
.else
stp x29, x22, [sp, #S_STACKFRAME]
.endif
add x29, sp, #S_STACKFRAME

下面分析do_debug_exception:

 asmlinkage int __exception do_debug_exception(unsigned long addr,
unsigned int esr,
struct pt_regs *regs)
{
const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr);
struct siginfo info;
int rv; /*
* Tell lockdep we disabled irqs in entry.S. Do nothing if they were
* already disabled to preserve the last enabled/disabled addresses.
*/
if (interrupts_enabled(regs))
trace_hardirqs_off(); if (user_mode(regs) && instruction_pointer(regs) > TASK_SIZE)
arm64_apply_bp_hardening(); if (!inf->fn(addr, esr, regs)) {
rv = ;
} else {
pr_alert("Unhandled debug exception: %s (0x%08x) at 0x%016lx\n",
inf->name, esr, addr); info.si_signo = inf->sig;
info.si_errno = ;
info.si_code = inf->code;
info.si_addr = (void __user *)addr;
arm64_notify_die("", regs, &info, );
rv = ;
} if (interrupts_enabled(regs))
trace_hardirqs_on(); return rv;
}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
第5行,esr的值是0x3C,所以DBG_ESR_EVT(esr)的值就是(0x3C>>1)&0x7 = 6,所以就是debug_fault_info数组的第6项: { early_brk64, SIGTRAP, TRAP_BRKPT, "aarch64 BRK" }
第19行,执行early_brk64
 
 /*
* Initial handler for AArch64 BRK exceptions
* This handler only used until debug_traps_init().
*/
int __init early_brk64(unsigned long addr, unsigned int esr,
struct pt_regs *regs)
{
return bug_handler(regs, esr) != DBG_HOOK_HANDLED;
}
接着又调用了bug_handler
 static int bug_handler(struct pt_regs *regs, unsigned int esr)
{
if (user_mode(regs))
return DBG_HOOK_ERROR; switch (report_bug(regs->pc, regs)) {
case BUG_TRAP_TYPE_BUG:
die("Oops - BUG", regs, );
break; case BUG_TRAP_TYPE_WARN:
break; default:
/* unknown/unrecognised bug trap type */
return DBG_HOOK_ERROR;
} /* If thread survives, skip over the BUG instruction and continue: */
regs->pc += AARCH64_INSN_SIZE; /* skip BRK and resume */
return DBG_HOOK_HANDLED;
}
 
第3行,检查异常发生时是否是EL0,如果是的话,直接返回,即这个函数只能被特权模式调用
第6行,调用report_bug,将异常指令的地址和pt_regs传递给它
下面分析report_bug:
 enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
{
struct bug_entry *bug;
const char *file;
unsigned line, warning, once, done; if (!is_valid_bugaddr(bugaddr))
return BUG_TRAP_TYPE_NONE; bug = find_bug(bugaddr);
if (!bug)
return BUG_TRAP_TYPE_NONE; file = NULL;
line = ;
warning = ; if (bug) {
#ifdef CONFIG_DEBUG_BUGVERBOSE
#ifndef CONFIG_GENERIC_BUG_RELATIVE_POINTERS
file = bug->file;
#else
file = (const char *)bug + bug->file_disp;
#endif
line = bug->line;
#endif
warning = (bug->flags & BUGFLAG_WARNING) != ;
once = (bug->flags & BUGFLAG_ONCE) != ;
done = (bug->flags & BUGFLAG_DONE) != ; if (warning && once) {
if (done)
return BUG_TRAP_TYPE_WARN; /*
* Since this is the only store, concurrency is not an issue.
*/
bug->flags |= BUGFLAG_DONE;
}
} if (warning) {
/* this is a WARN_ON rather than BUG/BUG_ON */
__warn(file, line, (void *)bugaddr, BUG_GET_TAINT(bug), regs,
NULL);
return BUG_TRAP_TYPE_WARN;
} printk(KERN_DEFAULT "------------[ cut here ]------------\n"); if (file)
pr_crit("kernel BUG at %s:%u!\n", file, line);
else
pr_crit("Kernel BUG at %p [verbose debug info unavailable]\n",
(void *)bugaddr); return BUG_TRAP_TYPE_BUG;
}

第10行,在__bug_table中找到bugaddr对应的那一项bug_entry,每个WARN_ON都会增加一项,这个函数首先在kernel的__bug_table段进行搜索,如果没有找到的话,就会在module的bug_table中进行搜索,这个很好理解,如果是静态编译到内核里的,那么就会在kernel的__bug_table里找到,如果编译到了内核模块中,那么就会在module的bug_table中找到。可以参考前面对__BUG_ENTRY的分析

第27行,bug->flags的值是0x901,所以warning是1,once和done都是0
第44行,调用__warn,file表示文件名,line表示行号,bugaddr表示brk指令的地址,regs为pt_regs
 
下面分析__warn:
 void __warn(const char *file, int line, void *caller, unsigned taint,
struct pt_regs *regs, struct warn_args *args)
{
disable_trace_on_warning(); pr_warn("------------[ cut here ]------------\n"); if (file)
pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n",
raw_smp_processor_id(), current->pid, file, line,
caller);
else
pr_warn("WARNING: CPU: %d PID: %d at %pS\n",
raw_smp_processor_id(), current->pid, caller); if (args)
vprintk(args->fmt, args->args); if (panic_on_warn) {
/*
* This thread may hit another WARN() in the panic path.
* Resetting this prevents additional WARN() from panicking the
* system on this thread. Other threads are blocked by the
* panic_mutex in panic().
*/
panic_on_warn = ;
panic("panic_on_warn set ...\n");
} print_modules(); if (regs)
show_regs(regs);
else
dump_stack(); print_oops_end_marker(); /* Just a warning, don't kill lockdep. */
add_taint(taint, LOCKDEP_STILL_OK);
}
 
 
第8~14,对应上面内核log的第2行,输出当前的cpu编号,当前的进程号,WARN_ON所在的文件名和行号,以及caller,即WARN_ON在被调用函数中的位置,就是demo_init+0xc/0x1000 [demo]
第16~17,输出args中的内容,这里是NULL,如果没有定义__WARN_TAINT,并且使用的是__WARN_printf,那么这里的args就不是NULL了。参考include/asm-generic/bug.h
第19~28,panic_on_warn表示发生调用__warn时是否触发panic。可以通过修改/proc/sys/kernel/panic_on_warn来改变panic_on_warn的值
第30行,输出module信息,对应内核log的第3行:demo(O+)。 其中demo表示module的名字,'O'表示TAINT_OOT_MODULE,'+'表示模块正在被加载,'-'表示模块正在被卸载
第33行,调用show_regs输出寄存器和栈信息,定义在arch/arm64/kernel/process.c中:
 void show_regs(struct pt_regs * regs)
{
__show_regs(regs);
dump_backtrace(regs, NULL);
}
 
下面分析一下__show_regs和dump_backtrace。
__show_regs:
 void __show_regs(struct pt_regs *regs)
{
int i, top_reg;
u64 lr, sp; if (compat_user_mode(regs)) {
lr = regs->compat_lr;
sp = regs->compat_sp;
top_reg = ;
} else {
lr = regs->regs[];
sp = regs->sp;
top_reg = ;
} show_regs_print_info(KERN_DEFAULT);
print_symbol("PC is at %s\n", instruction_pointer(regs));
print_symbol("LR is at %s\n", lr);
printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n",
regs->pc, lr, regs->pstate);
printk("sp : %016llx\n", sp); i = top_reg; while (i >= ) {
printk("x%-2d: %016llx ", i, regs->regs[i]);
i--; if (i % == ) {
pr_cont("x%-2d: %016llx ", i, regs->regs[i]);
i--;
} pr_cont("\n");
}
if (!user_mode(regs))
show_extra_register_data(regs, );
printk("\n");
}
 
 
第6~14,判断是不是compat_user_mode,即发生异常是从Aarch32到Aarch64,即从ARMv7切到ARMv8,这俩在寄存器上需要做一下映射。参考arch/arm64/include/asm/ptrace.h:
 /* Architecturally defined mapping between AArch32 and AArch64 registers */
#define compat_fp regs[11]
#define compat_sp regs[13]
#define compat_lr regs[14]
 
 
第17行,对应的就是log里的第7行:PC is at demo_init+0xc/0x1000 [demo],print_symbol函数将地址转换为内核的符号,这个后面分析。
第25~35,输出通用寄存器信息, 对于Aarch32切到Aarch64的情况,只输出x0~x12寄存器的值。这里可以学习一下printk和pr_cont的用法。
第36~37,如果发生异常时处于特权模式的话,然后会判断含有有效地址的寄存器,并输出这些地址周围的空间的内容
 
dump_backtrace:
 void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
{
struct stackframe frame;
int skip; pr_debug("%s(regs = %p tsk = %p)\n", __func__, regs, tsk); if (!tsk)
tsk = current; if (!try_get_task_stack(tsk))
return; if (tsk == current) {
frame.fp = (unsigned long)__builtin_frame_address();
frame.pc = (unsigned long)dump_backtrace;
} else {
/*
* task blocked in __switch_to
*/
frame.fp = thread_saved_fp(tsk);
frame.pc = thread_saved_pc(tsk);
}
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
frame.graph = tsk->curr_ret_stack;
#endif skip = !!regs;
printk("Call trace:\n");
while () {
unsigned long stack;
int ret; /* skip until specified stack frame */
if (!skip) {
dump_backtrace_entry(frame.pc);
} else if (frame.fp == regs->regs[]) {
skip = ;
/*
* Mostly, this is the case where this function is
* called in panic/abort. As exception handler's
* stack frame does not contain the corresponding pc
* at which an exception has taken place, use regs->pc
* instead.
*/
dump_backtrace_entry(regs->pc);
}
ret = unwind_frame(tsk, &frame);
if (ret < )
break;
if (in_entry_text(frame.pc)) {
stack = frame.fp - offsetof(struct pt_regs, stackframe); if (on_accessible_stack(tsk, stack))
dump_mem("", "Exception stack", stack,
stack + sizeof(struct pt_regs));
}
} put_task_stack(tsk);
}
 
第15行,获取当前帧寄存器x29的值
第28行,skip为1
第30~58,栈回溯。第37行控制输出栈回溯信息的起始点,如果从regs->pc开始全部输出栈回溯信息的话,会得到如下的栈信息:
[   16.097764] [<ffff000000c5500c>] demo_init+0xc/0x1000 [demo]
[ 16.098018] [<ffff00000808642c>] show_regs+0x2c/0x38
[ 16.098212] [<ffff0000080e4a7c>] __warn+0xb4/0x118
[ 16.098410] [<ffff000008c1c37c>] report_bug+0xbc/0x140
[ 16.098681] [<ffff00000808c034>] bug_handler.part.+0x24/0x78
[ 16.098893] [<ffff00000808c0c4>] bug_handler+0x3c/0x48
[ 16.099087] [<ffff0000080847f0>] brk_handler+0xe0/0x1a0
[ 16.099289] [<ffff0000080816bc>] do_debug_exception+0xa4/0x15c
[ 16.102487] [<ffff0000080830f0>] el1_dbg+0x18/0x74
[ 16.102699] [<ffff000000c5500c>] demo_init+0xc/0x1000 [demo]
[ 16.102915] [<ffff000008083cc4>] do_one_initcall+0x44/0x130
[ 16.103125] [<ffff0000081b9548>] do_init_module+0x64/0x1d4
[ 16.103322] [<ffff0000081b809c>] load_module+0x1e1c/0x24f0
[ 16.103535] [<ffff0000081b88f0>] SyS_init_module+0x180/0x218

上面的栈回溯信息中,我们只关心从demo_init开始的:

[   16.102699] [<ffff000000c5500c>] demo_init+0xc/0x1000 [demo]
[ 16.102915] [<ffff000008083cc4>] do_one_initcall+0x44/0x130
[ 16.103125] [<ffff0000081b9548>] do_init_module+0x64/0x1d4
[ 16.103322] [<ffff0000081b809c>] load_module+0x1e1c/0x24f0
[ 16.103535] [<ffff0000081b88f0>] SyS_init_module+0x180/0x218
 
 
这就是第37行的作用。这里还应该注意之前说的,将异常栈和进程栈连接起来,实现从异常栈一直回溯到进程栈。
 
dump_backtrace_entry函数定义如下:
 static void dump_backtrace_entry(unsigned long where)
{
/*
* Note that 'where' can have a physical address, but it's not handled.
*/
print_ip_sym(where);
} static inline void print_ip_sym(unsigned long ip)
{
printk("[<%p>] %pS\n", (void *) ip, (void *) ip);
}

上面%pS的作用就是将地址转换成内核符号。

第51~57,输出异常栈信息,主要就是struct pt_regs的内容:
 
完。
 
 
 
 

最新文章

  1. MySQL 5.6 记录 SQL 语句与慢查询
  2. 在filter中使用spring的service bean
  3. luogg_java学习_04_数组
  4. 9.请写出PHP5权限控制修饰符
  5. Rhel6-torque作业调度系统配置文档
  6. 灰度图像的自动阈值分割(Otsu 法)(转载)
  7. SQLserver临时表
  8. 经典sql(3)
  9. robotium和appium的一些区别
  10. python 二分查找法
  11. E - Emptying the Baltic Kattis - emptyingbaltic (dijkstra堆优化)
  12. 【阅读笔记】《C程序员 从校园到职场》第七章 指针和结构体
  13. 【Python】【jupyter-notebook】
  14. power designer 从sqlserver数据库获取字段说明&amp;导出rtf文档模板
  15. java获取当月的第一天和最后一天,获取本周的第一天和最后一天
  16. emlog 百度熊掌号提交插件-基于Emlog6.0.1特别版美化
  17. 关于变长数组的一点小想法-C语言定义数组但是数组长度不确定怎么办
  18. JS JavaScript深拷贝、浅拷贝
  19. 安装arch系统时,把ubuntu的efi分区格式化
  20. 【dfs】【高斯消元】【异或方程组】bzoj1770 [Usaco2009 Nov]lights 燈 / bzoj2466 [中山市选2009]树

热门文章

  1. 常见网页编辑器(富文本,Markdown,代码编辑等)
  2. uname 命令简介
  3. 使用helm管理复杂kubernetes应用
  4. 【ASP.NET Core分布式项目实战】(六)Gitlab安装
  5. Windows彻底卸载VMWare虚拟机详细步骤
  6. 【计算机视觉】BRIEF特征匹配
  7. jdk 1.6 新特性
  8. [转帖]如何获得一个Oracle RAC数据库(从Github - oracle/vagrant-boxes) --- 暂时未测试成功 公司网络太差了..
  9. .NET CORE 中的缓存使用
  10. Codeforces 878 E. Numbers on the blackboard