ARM cortex-M上面的fault想必大家都不陌生,我相信还没有谁从来没有出现过fault。
但出现fault后如何排查,相信很多人都是一筹莫展。
在我的项目中加了一些代码,Fault 后可以打印出更多的信息。
#define SCB_CFSR (*(volatile const unsigned *)0xE000ED28) /* Configurable Fault Status Register */
#define SCB_HFSR (*(volatile const unsigned *)0xE000ED2C) /* HardFault Status Register */
#define SCB_MMAR (*(volatile const unsigned *)0xE000ED34) /* MemManage Fault Address register */
#define SCB_BFAR (*(volatile const unsigned *)0xE000ED38) /* Bus Fault Address Register */
#define SCB_CFSR_MFSR (*(volatile const unsigned char*)0xE000ED28) /* Memory-management Fault Status Register */
#define SCB_CFSR_BFSR (*(volatile const unsigned char*)0xE000ED29) /* Bus Fault Status Register */
#define SCB_CFSR_UFSR (*(volatile const unsigned short*)0xE000ED2A) /* Usage Fault Status Register */
static void usage_fault_track(void)
{
rt_kprintf("usage fault:
");
rt_kprintf("SCB_CFSR_UFSR:0x%02X ", SCB_CFSR_UFSR);
if(SCB_CFSR_UFSR & (1<<0))
{
/* [0]:UNDEFINSTR */
rt_kprintf("UNDEFINSTR ");
}
if(SCB_CFSR_UFSR & (1<<1))
{
/* [1]:INVSTATE */
rt_kprintf("INVSTATE ");
}
if(SCB_CFSR_UFSR & (1<<2))
{
/* [2]:INVPC */
rt_kprintf("INVPC ");
}
if(SCB_CFSR_UFSR & (1<<3))
{
/* [3]:NOCP */
rt_kprintf("NOCP ");
}
if(SCB_CFSR_UFSR & (1<<8))
{
/* [8]:UNALIGNED */
rt_kprintf("UNALIGNED ");
}
if(SCB_CFSR_UFSR & (1<<9))
{
/* [9]:DIVBYZERO */
rt_kprintf("DIVBYZERO ");
}
rt_kprintf("
");
}
static void bus_fault_track(void)
{
rt_kprintf("bus fault:
");
rt_kprintf("SCB_CFSR_BFSR:0x%02X ", SCB_CFSR_BFSR);
if(SCB_CFSR_BFSR & (1<<0))
{
/* [0]:IBUSERR */
rt_kprintf("IBUSERR ");
}
if(SCB_CFSR_BFSR & (1<<1))
{
/* [1]:PRECISERR */
rt_kprintf("PRECISERR ");
}
if(SCB_CFSR_BFSR & (1<<2))
{
/* [2]:IMPRECISERR */
rt_kprintf("IMPRECISERR ");
}
if(SCB_CFSR_BFSR & (1<<3))
{
/* [3]:UNSTKERR */
rt_kprintf("UNSTKERR ");
}
if(SCB_CFSR_BFSR & (1<<4))
{
/* [4]:STKERR */
rt_kprintf("STKERR ");
}
if(SCB_CFSR_BFSR & (1<<7))
{
rt_kprintf("SCB->BFAR:%08X
", SCB_BFAR);
}
else
{
rt_kprintf("
");
}
}
static void mem_manage_fault_track(void)
{
rt_kprintf("mem manage fault:
");
rt_kprintf("SCB_CFSR_MFSR:0x%02X ", SCB_CFSR_MFSR);
if(SCB_CFSR_MFSR & (1<<0))
{
/* [0]:IACCVIOL */
rt_kprintf("IACCVIOL ");
}
if(SCB_CFSR_MFSR & (1<<1))
{
/* [1]:DACCVIOL */
rt_kprintf("DACCVIOL ");
}
if(SCB_CFSR_MFSR & (1<<3))
{
/* [3]:MUNSTKERR */
rt_kprintf("MUNSTKERR ");
}
if(SCB_CFSR_MFSR & (1<<4))
{
/* [4]:MSTKERR */
rt_kprintf("MSTKERR ");
}
if(SCB_CFSR_MFSR & (1<<7))
{
/* [7]:MMARVALID */
rt_kprintf("SCB->MMAR:%08X
", SCB_MMAR);
}
else
{
rt_kprintf("
");
}
}
static void hard_fault_track(void)
{
if(SCB_HFSR & (1UL<<1))
{
/* [1]:VECTBL, Indicates hard fault is caused by failed vector fetch. */
rt_kprintf("failed vector fetch
");
}
if(SCB_HFSR & (1UL<<30))
{
/* [30]:FORCED, Indicates hard fault is taken because of bus fault,
memory management fault, or usage fault. */
if(SCB_CFSR_BFSR)
{
bus_fault_track();
}
if(SCB_CFSR_MFSR)
{
mem_manage_fault_track();
}
if(SCB_CFSR_UFSR)
{
usage_fault_track();
}
}
if(SCB_HFSR & (1UL<<31))
{
/* [31]:DEBUGEVT, Indicates hard fault is triggered by debug event. */
rt_kprintf("debug event
");
}
}
/**
* fault exception handling
*/
void rt_hw_hard_fault_exception(struct stack_context* contex)
{
rt_kprintf("psr: 0x%08x
", contex->psr);
rt_kprintf(" pc: 0x%08x
", contex->pc);
rt_kprintf(" lr: 0x%08x
", contex->lr);
rt_kprintf("r12: 0x%08x
", contex->r12);
rt_kprintf("r03: 0x%08x
", contex->r3);
rt_kprintf("r02: 0x%08x
", contex->r2);
rt_kprintf("r01: 0x%08x
", contex->r1);
rt_kprintf("r00: 0x%08x
", contex->r0);
hard_fault_track();
rt_kprintf("hard fault on thread: %s
", rt_current_thread->name);
#ifdef RT_USING_FINSH
list_thread();
#endif
while (1);
}
再写了两个测试代码,以手动触发fault。
void div0_test(void)
{
volatile int * SCB_CCR = (volatile int *)0xE000ED14; // SCB->CCR
int x,y,z;
*SCB_CCR |= (1 << 4); /* bit4: DIV_0_TRP. */
x = 10;
y = 0;
z = x / y;
rt_kprintf("z:%d
", z);
}
void unalign_test(void)
{
volatile int * SCB_CCR = (volatile int *)0xE000ED14; // SCB->CCR
volatile int * p;
volatile int value;
*SCB_CCR |= (1 << 3); /* bit3: UNALIGN_TRP. */
p = (int *)0x00;
value = *p;
rt_kprintf("addr:0x%02X value:0x%08X
", (int)p, value);
p = (int *)0x04;
value = *p;
rt_kprintf("addr:0x%02X value:0x%08X
", (int)p, value);
p = (int *)0x03;
value = *p;
rt_kprintf("addr:0x%02X value:0x%08X
", (int)p, value);
}
#ifdef RT_USING_FINSH
#include
FINSH_FUNCTION_EXPORT(div0_test, div0_test)
FINSH_FUNCTION_EXPORT(unalign_test, unalign_test)
#endif /* RT_USING_FINSH */
测试访问末授权区域 ``` finsh>>int * p //声明一个指针变量
0, 0x00000000
finsh>>p = 0xDFFFFFF0 // 指向片上外设区结束处,一般不可能用完,所以此处一般不可访问。
-536870928, 0xdffffff0
finsh>>*p // 读取指针处数据 psr: 0x01000000 pc: 0x00000e3e lr: 0x0000451d r12: 0x00000000 r03: 0x00000000 r02: 0x1fff0180 r01: 0x1fff0814 r00: 0xdffffff0 bus fault: SCB_CFSR_BFSR:0x82 PRECISERR SCB->BFAR:DFFFFFF0 hard fault on thread: tshell thread pri status sp stack size max used left tick error
tidle 0x1f ready 0x00000040 0x00000100 0x00000060 0x00000015 000 tshell 0x14 ready 0x00000088 0x00000400 0x00000218 0x00000009 000
2. 非对齐访问测试
finsh>>unalign_test() addr:0x00 value:0x20001B80 addr:0x04 value:0x0800DE81 psr: 0x21000000 r00: 0x00000000 r01: 0x40013800 r02: 0x20000690 r03: 0x00000000 r04: 0x00000003 r05: 0xe000ed14 r06: 0xdeadbeef r07: 0x20002678 r08: 0xdeadbeef r09: 0xdeadbeef r10: 0xdeadbeef r11: 0xdeadbeef r12: 0x08000a95 lr: 0x08002eb5 pc: 0x08000386 usage fault: SCB_CFSR_UFSR:0x100 UNALIGNED hard fault on thread: tshell thread pri status sp stack size max used left tick error
tidle 0x1f ready 0x00000040 0x00000100 0x0000005c 0x00000009 000 tshell 0x14 ready 0x00000088 0x00000800 0x000001b0 0x0000000a 000 led 0x14 suspend 0x00000078 0x00000200 0x00000078 0x00000005 000
3. 除零异常测试
finsh>>div0_test() psr: 0x41000000 r00: 0x00000010 r01: 0x08000337 r02: 0x20000bb7 r03: 0x20000130 r04: 0xe000ed14 r05: 0x00000000 r06: 0xdeadbeef r07: 0x0000000a r08: 0xdeadbeef r09: 0xdeadbeef r10: 0xdeadbeef r11: 0xdeadbeef r12: 0x00000000 lr: 0x08008d91 pc: 0x08000348 usage fault: SCB_CFSR_UFSR:0x200 DIVBYZERO hard fault on thread: tshell thread pri status sp stack size max used left tick error
tidle 0x1f ready 0x00000058 0x00000100 0x0000005c 0x0000000f 000 tshell 0x14 ready 0x00000088 0x00000800 0x000001b0 0x0000000a 000 led 0x14 suspend 0x00000078 0x00000200 0x00000078 0x00000005 000
## 问题追踪
上面测试出了问题,是我们人为设置的故障,但在平时调试中出了问题如何追综呢?
以非对齐访问为例,开发环境使用MDK。
1. 进入JTAG仿真状态,并触发非对齐异常。
此时串口会打印出异常时的寄存器值,此时停止仿真器发现程序停在rt_hw_hard_fault_exception中。
2. 根据上面打印出来的寄存器,提取出关键值是 pc: 0x08000386
我们在MDK的command窗口中输入 `pc = 0x08000386`
![command.jpg](/uploads/414_526a14396ff2879eca9da075972f8b1c.jpg)
可以把PC指针临时设回问题发生时的场景,我们看到出现问题的指令是
239: p = (int )0x03; 0x08000384 2403 MOVS r4,#0x03 240: value = p; 0x08000386 6820 LDR r0,[r4,#0x00] 0x08000388 9000 STR r0,[sp,#0x00] ```
分析 #386 这条指令从 R4+0 的问题读取4字节到R0中, 先前打印出的R4的值为 r04: 0x00000003 因此可以确定为这是因为地址不对齐造成的。
当然,具体情况要具体分析,有时候某个步骤出现问题并不会马上崩溃,
而是过一段时间以后才出问题,因此要结合上下文综合分析。
比如上面这个案例真正有问题的指令是 0x08000384。
根据以上的案例,并结合实际调试经验,相信大家可以更快地找出问题。
原作者:aozima
|