93c703689cfe527165688487545dcbc8
代码
视频[media=x,500,375]【基于米尔MYD-YT153MX-MINI 开发板的SPeex+KWS语音关键字识别Demo】 https://www.bilibili.com/video/B ... 80acfe41f622b10a844[/media]
https://mp.weixin.qq.com/s/NPqtvhQdKIfkYTfVho43Ow
前言
前面我们移植了Speex和ML-KWS-for-MCU,测试跑通了对应的代码。
对于真实应用场景我们需要实时采集音频数据进行处理识别,由于本MINI板只有LINE OUT语音输出,
没有音频输入接口,USB声卡也没有默认驱动支持,所以暂时先录一段wav音频,导入到开发板进行测试。
后续只需要把从wav音频文件取数据改为实时采集音频输入数据即可。
准备测试音频
模型支持"yes","no","up","down","left","right","on","off","stop","go"这几个关键字,
我们就录音一段包含这些关键字的语音,转为单通道,16bit的wav音频文件。
这里使用Audacity录制
菜单栏:播录->录制->录制
文件->导出音频->导出到计算机
导出为单声道 16bit
然后音频文件导入到开发板
- scp kws_test.wav root@169.254.79.46:/root/
复制代码
测试Demo程序
speex_kws.c中
- #ifdef HAVE_CONFIG_H
- #include "config.h"
- #endif
- #include "speex/speex_echo.h"
- #include "speex/speex_preprocess.h"
- #include
- #include
- #include
- #include
- static uint64_t get_tm_us(void){
- struct timespec ts;
- clock_gettime(CLOCK_MONOTONIC,&ts);
- return (uint64_t)ts.tv_sec*(uint64_t)1000000 + (uint64_t)ts.tv_nsec/(uint64_t)1000;
- }
- /* WAV解析 */
- #define CHUNK_RIFF "RIFF"
- #define CHUNK_WAVE "WAVE"
- #define CHUNK_FMT "fmt "
- #define CHUNK_DATA "data"
- typedef struct
- {
- uint32_t off;
- uint32_t chunksize;
- uint16_t audioformat;
- uint16_t numchannels;
- uint32_t samplerate;
- uint32_t byterate;
- uint16_t blockalign;
- uint16_t bitspersample;
- uint32_t datasize;
- }wav_t;
- static int wav_decode_head(uint8_t* buffer, wav_t* wav)
- {
- uint8_t* p = buffer;
- uint32_t chunksize;
- uint32_t subchunksize;
- if(0 != memcmp(p,CHUNK_RIFF,4))
- {
- return -1;
- }
- p += 4;
- chunksize = (uint32_t)p[0] | ((uint32_t)p[1]<<8) | ((uint32_t)p[2]<<16) | ((uint32_t)p[3]<<24);
- wav->chunksize = chunksize;
- p += 4;
- if(0 != memcmp(p,CHUNK_WAVE,4))
- {
- return -2;
- }
- p += 4;
- do
- {
- if(0 == memcmp(p,CHUNK_FMT,4))
- {
- p += 4;
- subchunksize = (uint32_t)p[0] | ((uint32_t)p[1]<<8) | ((uint32_t)p[2]<<16) | ((uint32_t)p[3]<<24);
- p += 4;
- /* 解析参数 */
- wav->audioformat = (uint16_t)p[0] | ((uint16_t)p[1]<<8);
- if((wav->audioformat == 0x0001) || (wav->audioformat == 0xFFFE))
- {
- p += 2;
- wav->numchannels = (uint16_t)p[0] | ((uint16_t)p[1]<<8);
- p += 2;
- wav->samplerate = (uint32_t)p[0] | ((uint32_t)p[1]<<8) | ((uint32_t)p[2]<<16) | ((uint32_t)p[3]<<24);
- p += 4;
- wav->byterate = (uint32_t)p[0] | ((uint32_t)p[1]<<8) | ((uint32_t)p[2]<<16) | ((uint32_t)p[3]<<24);
- p += 4;
- wav->blockalign = (uint16_t)p[0] | ((uint16_t)p[1]<<8);
- p += 2;
- wav ->bitspersample = (uint16_t)p[0] | ((uint16_t)p[1]<<8);
- p += 2;
- if(subchunksize >16)
- {
- /* 有ext区域 */
- uint16_t cbsize = (uint16_t)p[0] | ((uint16_t)p[1]<<8);
- p += 2;
- if(cbsize > 0)
- {
- /* ext数据 2字节有效bits wValidBitsPerSample ,4字节dwChannelMask 16字节SubFormat */
- p += 2;
- p += 4;
- /* 比对subformat */
- p += 16;
- }
- }
- }
- else
- {
- p += subchunksize;
- }
- }
- else if(0 == memcmp(p,CHUNK_DATA,4))
- {
- p += 4;
- subchunksize = (uint32_t)p[0] | ((uint32_t)p[1]<<8) | ((uint32_t)p[2]<<16) | ((uint32_t)p[3]<<24);
- wav->datasize = subchunksize;
- p += 4;
- wav->off = (uint32_t)(p- buffer);
- return 0;
- }
- else
- {
- p += 4;
- subchunksize = (uint32_t)p[0] | ((uint32_t)p[1]<<8) | ((uint32_t)p[2]<<16) | ((uint32_t)p[3]<<24);
- p += 4;
- p += subchunksize;
- }
- }while((uint32_t)(p - buffer) < (chunksize + 8));
- return -3;
- }
- /* 填充44字节的wav头 */
- static void wav_fill_head(uint8_t* buffer, int samples, int chnum, int freq)
- {
- /*
- * 添加wav头信息
- */
- uint32_t chunksize = 44-8+samples*chnum*16/8;
- uint8_t* p = (uint8_t*)buffer;
- uint32_t bps = freq*chnum*16/8;
- uint32_t datalen = samples*chnum*16/8;
- p[0] = 'R';
- p[1] = 'I';
- p[2] = 'F';
- p[3] = 'F';
- p[4] = chunksize & 0xFF;
- p[5] = (chunksize>>8) & 0xFF;
- p[6] = (chunksize>>16) & 0xFF;
- p[7] = (chunksize>>24) & 0xFF;
- p[8] = 'W';
- p[9] = 'A';
- p[10] = 'V';
- p[11] = 'E';
- p[12] = 'f';
- p[13] = 'm';
- p[14] = 't';
- p[15] = ' ';
- p[16] = 16; /* Subchunk1Size */
- p[17] = 0;
- p[18] = 0;
- p[19] = 0;
- p[20] = 1; /* PCM */
- p[21] = 0;
- p[22] = chnum; /* 通道数 */
- p[23] = 0;
- p[24] = freq & 0xFF;
- p[25] = (freq>>8) & 0xFF;
- p[26] = (freq>>16) & 0xFF;
- p[27] = (freq>>24) & 0xFF;
- p[28] = bps & 0xFF; /* ByteRate */
- p[29] = (bps>>8) & 0xFF;
- p[30] = (bps>>16) & 0xFF;
- p[31] = (bps>>24) & 0xFF;
- p[32] = chnum*16/8; /* BlockAlign */
- p[33] = 0;
- p[34] = 16; /* BitsPerSample */
- p[35] = 0;
- p[36] = 'd';
- p[37] = 'a';
- p[38] = 't';
- p[39] = 'a';
- p[40] = datalen & 0xFF;
- p[41] = (datalen>>8) & 0xFF;
- p[42] = (datalen>>16) & 0xFF;
- p[43] = (datalen>>24) & 0xFF;
- }
- void wav_print(wav_t* wav)
- {
- printf("off:%d\r\n",wav->off);
- printf("chunksize:%d\r\n",wav->chunksize);
- printf("audioformat:%d\r\n",wav->audioformat);
- printf("numchannels:%d\r\n",wav->numchannels);
- printf("samplerate:%d\r\n",wav->samplerate);
- printf("byterate:%d\r\n",wav->byterate);
- printf("blockalign:%d\r\n",wav->blockalign);
- printf("bitspersample:%d\r\n",wav->bitspersample);
- printf("datasize:%d\r\n",wav->datasize);
- }
- #define NN 128
- #define TAIL 1024
- #define AUDIO_SAMPLE_RATE 16000u
- static int16_t* s_kws_buffer = 0;
- volatile static int s_kws_done_flag = 1;
- volatile static int s_kws_bufferlen = 0;
- volatile static int s_kws_pingpong = 0;
- extern int kws_run(int16_t *in, int* percent);
- int main(int argc, char **argv)
- {
- int kws_rec_flag = 0;
- FILE *mic_fd;
- short mic_buf[NN];
- short spk_buf[NN];
- short out_buf[NN];
- uint8_t mic_wav_buf[44]; /* 输入mic wav文件头缓存 */
- wav_t mic_wav;
- int samps; /* 采样点数 */
- int times; /* 读取次数 */
- SpeexEchoState *st;
- SpeexPreprocessState *den;
- int sampleRate;
- char* mic_fname = argv[1];
- int ctl_i;
- float ctl_f;
- if (argc != 2)
- {
- fprintf(stderr, "speex_kws mic.wav\n");
- exit(1);
- }
- memset(spk_buf, 0, sizeof(spk_buf));
- mic_fd = fopen(mic_fname, "rb");
- if(mic_fd == NULL){
- fprintf(stderr, "open file %s err\n",mic_fname);
- exit(1);
- }else{
- fprintf(stderr, "open file %s ok\n",mic_fname);
- }
- if(44 != fread(mic_wav_buf, 1, 44, mic_fd)){
- fprintf(stderr, "read file %s err\n",mic_fname);
- fclose(mic_fd);
- exit(1);
- }else{
- fprintf(stderr, "read file %s ok\n",mic_fname);
- }
- if(0 != wav_decode_head(mic_wav_buf, &mic_wav)){
- fprintf(stderr, "decode file %s err\n",mic_fname);
- fclose(mic_fd);
- exit(1);
- }else{
- fprintf(stderr, "decode file %s ok\n",mic_fname);
- }
- printf("[mic_wav]\r\n");
- wav_print(&mic_wav);
- samps = mic_wav.datasize;
- samps /= mic_wav.blockalign; /* 采样点数 = 数据大小 除以 blockalign */
- printf("\r\nsamps:%d\r\n",samps);
- sampleRate = mic_wav.samplerate;
- st = speex_echo_state_init(NN, TAIL);
- den = speex_preprocess_state_init(NN, sampleRate);
- speex_echo_ctl(st, SPEEX_ECHO_SET_SAMPLING_RATE, &sampleRate);
- ctl_i=1;
- speex_preprocess_ctl(den, SPEEX_PREPROCESS_SET_DENOISE, &ctl_i); /* 打开降噪 ctl_i=1打开 0关闭*/
- ctl_i=80;
- speex_preprocess_ctl(den, SPEEX_PREPROCESS_SET_NOISE_SUPPRESS, &ctl_i);
- ctl_i=80;
- speex_preprocess_ctl(den, SPEEX_PREPROCESS_SET_ECHO_SUPPRESS, &ctl_i);
- ctl_i=1;
- speex_preprocess_ctl(den, SPEEX_PREPROCESS_SET_VAD, &ctl_i); /* 1使能 0不使能 VAD */
- ctl_i=95;
- speex_preprocess_ctl(den, SPEEX_PREPROCESS_SET_PROB_START, &ctl_i); /* 0~100 从静音到活动的阈值 */
- ctl_i=90;
- speex_preprocess_ctl(den, SPEEX_PREPROCESS_SET_PROB_CONTINUE, &ctl_i); /* 0~100 保持活动的阈值 */
- int speex_vad_flag_pre = 0;
- times = samps / NN; /* 一次读取NN个点,读取times次 */
- s_kws_buffer = malloc(AUDIO_SAMPLE_RATE*sizeof(int16_t) * 2); /* 1S时间 ping-pong */
- for(int i=0; i= NN * sizeof(int16_t) ? NN * sizeof(int16_t) : space;
- memcpy(s_kws_buffer+s_kws_pingpong*maxlen/sizeof(int16_t)+s_kws_bufferlen/sizeof(int16_t), mic_buf, torcv);
- s_kws_bufferlen += torcv;
- if(s_kws_bufferlen >= maxlen){
- //xprintf("buffer %d done %d\r\n",s_kws_pingpong,xTaskGetTickCount());
- s_kws_bufferlen = 0;
- s_kws_pingpong ^= 0x01;
- kws_rec_flag = 0;
-
- /* 发送信号量给处理任务处理, 这里直接处理 */
- int16_t* buffer;
- if(s_kws_pingpong == 0){
- buffer = s_kws_buffer + AUDIO_SAMPLE_RATE;
- }else{
- buffer = s_kws_buffer;
- }
- volatile uint64_t t0;
- volatile uint64_t t1;
- int percent;
- t0 = get_tm_us();
- int res = kws_run(buffer, &percent);
- t1 = get_tm_us();
- printf("used:%luuS\r\n",(t1-t0));
- char output_class[12][8] = {"Silence", "Unknown","yes","no","up","down","left","right","on","off","stop","go"};
- printf("Detected %s (%d%%)\r\n",output_class[res],percent);
- }
- }
- }
- speex_echo_state_destroy(st);
- speex_preprocess_state_destroy(den);
- fclose(mic_fd);
- free(s_kws_buffer);
- return 0;
- }
复制代码
编译脚本
- #! /bin/sh
-
- ARM-linux-gnueabihf-g++ -c -O3 -Wno-narrowing -fpermissive -Iinclude -I. -Ikws/CMSIS/NN/Include -Ikws/CMSIS/DSP/Include -Ikws/Source/NN/ -Ikws/Source/MFCC/ -Ikws/Source/local_NN/ -Ikws/Source/NN/DNN/ -Ikws/Source/NN/DS_CNN/ -Ikws/Source/KWS/KWS_DS_CNN/ -Ikws/Source/KWS/ -DHAVE_CONFIG_H \
- kws/Source/KWS/*.cpp \
- kws/Source/KWS/KWS_DNN/*.cpp \
- kws/Source/KWS/KWS_DS_CNN/*.cpp \
- kws/Source/MFCC/*.cpp \
- kws/Source/NN/*.cpp \
- kws/Source/NN/DNN/*.cpp \
- kws/Source/NN/DS_CNN/*.cpp \
- kws/simple_test/*.cpp
-
- arm-linux-gnueabihf-gcc -c -O3 -Iinclude -I. -Ikws/CMSIS/NN/Include -Ikws/CMSIS/DSP/Include -Ikws/Source/NN/ -Ikws/Source/MFCC/ -Ikws/Source/local_NN/ -Ikws/Source/NN/DNN/ -Ikws/Source/NN/DS_CNN/ -Ikws/Source/KWS/KWS_DS_CNN/ -Ikws/Source/KWS/ -DHAVE_CONFIG_H \
- libspeexdsp/*.c \
- speex_kws.c \
- kws/Source/local_NN/*.c \
- kws/CMSIS/NN/Source/ActivationFunctions/*.c \
- kws/CMSIS/NN/Source/ConvolutionFunctions/*.c \
- kws/CMSIS/NN/Source/FullyConnectedFunctions/*.c \
- kws/CMSIS/NN/Source/NNSupportFunctions/*.c \
- kws/CMSIS/NN/Source/PoolingFunctions/*.c \
- kws/CMSIS/NN/Source/SoftmaxFunctions/*.c \
- kws/CMSIS/DSP/Source/BasicMathFunctions/*.c \
- kws/CMSIS/DSP/Source/CommonTables/*.c \
- kws/CMSIS/DSP/Source/ComplexMathFunctions/*.c \
- kws/CMSIS/DSP/Source/ControllerFunctions/*.c \
- kws/CMSIS/DSP/Source/FastMathFunctions/*.c \
- kws/CMSIS/DSP/Source/FilteringFunctions/*.c \
- kws/CMSIS/DSP/Source/MatrixFunctions/*.c \
- kws/CMSIS/DSP/Source/StatisticsFunctions/*.c \
- kws/CMSIS/DSP/Source/SupportFunctions/*.c \
- kws/CMSIS/DSP/Source/TransformFunctions/*.c \
- kws/CMSIS/DSP/Source/TransformFunctions/*.S
-
- arm-linux-gnueabihf-g++ *.o -static -O3 -Iinclude -I. -Ikws/CMSIS/NN/Include -Ikws/CMSIS/DSP/Include -Ikws/Source/NN/ -Ikws/Source/MFCC/ -Ikws/Source/local_NN/ -Ikws/Source/NN/DNN/ -Ikws/Source/NN/DS_CNN/ -Ikws/Source/KWS/KWS_DS_CNN/ -Ikws/Source/KWS/ -DHAVE_CONFIG_H -Wno-narrowing -fpermissive -lm -o speex_kws
-
- rm *.o
-
-
-
复制代码
编译
测试
程序导入到开发板
- scp speex_kws root@169.254.79.46:/root/
复制代码
运行
- chmod +x ./speex_kws kws
- ./speex_kws kws_test.wav
复制代码
打印如下
可以看到识别全部正确,得益于T153 CPU
[size=13.3333px]强大的性能,算法和识别时间非常短,在实时应用场景使用也是毫无压力的
0
|
2026-3-18 12:10:49
评论
淘帖0
|
|
|
|