最近调试
STM32的TCP服务端程序,遇到了想不明白的问题,具体过程如下:
使用的是rt-thread4.1.0的内核;
通过串口和esp32-c3
通信;
启用at_socket;
建立tcp服务器;
与wifi中其他客户端通信。
总体过程如上,刚开始调试发现esp32的软件包没有实现服务端,于是我就用esp8266的软件包测试,一切OK,tcp通信很好,然后我就把缺失的驱动从8266复制到esp32中,然后就出现了问题:
psr: 0x61000000
r00: 0x08024e30
r01: 0x00000000
r02: 0x20006590
r03: 0x00000000
r04: 0x2000b198
r05: 0x2000a768
r06: 0x200001cc
r07: 0x08026080
r08: 0x08025a10
r09: 0x0000000d
r10: 0x00000000
r11: 0x00000000
r12: 0x00000000
lr: 0x0800addd
pc: 0x0800adec
hard fault on thread: at_clnt
如上,我后面多次调试,确认问题点在at_socket.c中的函数at_connect_no
tice_cb中,原代码如下:
#ifdef AT_USING_SOCKET_SERVER
// 屏蔽掉此处代码,解决hard fault问题
// static int (*store_at_socket_temporary)(struct at_device *device, enum at_socket_type type);
static void at_connect_notice_cb(struct at_socket *sock, at_socket_evt_t event, const char *buff, size_t bfsz)
{
RT_ASSERT(buff);
RT_ASSERT(sock == RT_NULL);
RT_ASSERT(event == AT_SOCKET_EVT_CONNECTED);
extern struct netdev *netdev_default;
struct netdev *netdev = RT_NULL;
struct at_device *device = RT_NULL;
struct at_socket *new_sock = RT_NULL;
rt_base_t level;
rt_slist_t *node = RT_NULL;
struct at_socket *at_sock = RT_NULL;
char *socket_info = RT_NULL;
int base_socket = 0;
if (netdev_default && netdev_is_up(netdev_default) &&
netdev_family_get(netdev_default) == AF_AT)
{
netdev = netdev_default;
}
else
{
/* get network interface device by protocol family AF_AT */
netdev = netdev_get_by_family(AF_AT);
if (netdev == RT_NULL)
{
return;
}
}
device = at_device_get_by_name(AT_DEVICE_NAMETYPE_NETDEV, netdev->name);
if (device == RT_NULL)
{
return;
}
/* avoid use bottom driver to alloc "socket" */
// 屏蔽掉此处代码,解决hard fault问题
// store_at_socket_temporary = device->class->socket_ops->at_socket;
// device->class->socket_ops->at_socket = RT_NULL;
new_sock = alloc_socket_by_device(device, AT_SOCKET_TCP);
if (new_sock == RT_NULL)
{
return;
}
new_sock->type = AT_SOCKET_TCP;
new_sock->state = AT_SOCKET_CONNECT;
/* set AT socket receive data callback function */
new_sock->ops->at_set_event_cb(AT_SOCKET_EVT_RECV, at_recv_notice_cb);
new_sock->ops->at_set_event_cb(AT_SOCKET_EVT_CLOSED, at_closed_notice_cb);
new_sock->ops->at_set_event_cb(AT_SOCKET_EVT_CONNECTED, at_connect_notice_cb);
// 屏蔽掉此处代码,解决hard fault问题
//device->class->socket_ops->at_socket = store_at_socket_temporary;
/* put incoming "socket" to the listen socket receiver packet list */
sscanf(buff, "SOCKET:%d", &base_socket);
LOG_D("ACCEPT BASE SOCKET: %d", base_socket);
new_sock->user_data = (void *)base_socket;
socket_info = rt_malloc(AT_SOCKET_INFO_LEN);
rt_memset(socket_info, 0, AT_SOCKET_INFO_LEN);
rt_sprintf(socket_info, "SOCKET:%d", new_sock->socket);
/* find out the listen socket */
level = rt_hw_interrupt_disable();
rt_slist_for_each(node, &_socket_list)
{
at_sock = rt_slist_entry(node, struct at_socket, list);
if (at_sock && at_sock->magic == AT_SOCKET_MAGIC && at_sock->listen.is_listen == RT_TRUE)
{
break;
}
at_sock = RT_NULL;
}
rt_hw_interrupt_enable(level);
if(at_sock == RT_NULL)
{
return;
}
/* wakeup the "accept" function */
rt_mutex_take(at_sock->recv_lock, RT_WAITING_FOREVER);
if (at_recvpkt_put(&(at_sock->recvpkt_list), socket_info, AT_SOCKET_INFO_LEN) != RT_EOK)
{
rt_free((void *)buff);
rt_mutex_release(at_sock->recv_lock);
return;
}
rt_mutex_release(at_sock->recv_lock);
rt_sem_release(at_sock->recv_notice);
at_do_event_changes(at_sock, AT_EVENT_RECV, RT_TRUE);
}
#endif
问题就在我注释掉的程序中,使用变量store_at_socket_temporary,每次程序执行下面这条语句
new_sock = alloc_socket_by_device(device, AT_SOCKET_TCP);
就会导致hard fault,始终没想明白什么原因,于是注释掉了和这个变量相关的语句,一切正常运行。
我后期又通过vscode对比了esp8266和esp32相关的3个驱动文件,除了有几处at指令语句因为esp at固件不同,稍微差几个字,以及变量名一个是esp8266开头,一个是esp32开头,其他的都一模一样。
大家有明白原因的吗,虽然问题解决了,但是百思不得其解啊!