全志科技
直播中

南风一号

8年用户 1000经验值
擅长:EMC/EMI设计
私信 关注
[问答]

如何实现基于R2329-AIPU的动态手势识别及实机部署运行?

如何实现基于R2329-AIPU的动态手势识别及实机部署运行?

回帖(1)

杨艳

2021-12-29 10:45:41
一、模型与算法


采用MIT开源的TSM算法,论文作者通过对特征进行shift操作,在不增加额外参数和算力的情况下进行时间建模,然后移植到了各种手机上进行了各种算法的对比。作者代码也移植到了jetson nano上能达到实时运行(没有使用CUDA),甚至在FPGA上也进行了移植,满满的工业风。






二、效果展示






模型backbone是mobileV2,然而运行只有18FPS,不应该啊,看了直播才知道,初代AIPU对分组卷积不太友好,ok好吧。我也尝试了在R329 CPU上运行,只有3FPS。模型在笔记本i7 cpu下TF pb模型能有65FPS,ONNX 120FPS。






三、移植到AIPU






3.1模型量化






为了防止出现未知错误,我直接把作者的pytorch代码重构为TensoFlow代码,后面会开源。


由于涉及到时间建模,模型输入有11个输入矩阵。1个图像矩阵,10个转移矩阵,输出也是11个矩阵,1个分类矩阵,10个转移矩阵。


cfg配置如下:














[[Common]


mode = build


use_aqt = True






[Parser]


model_name = mobilenet_v2


model_type = tensorflow


detection_postprocess =


model_domain = image_classification


output = fully_connected/BiasAdd,Slice,Slice_2,Slice_4,Slice_6,Slice_8,Slice_10,Slice_12,Slice_14,Slice_16,Slice_18


input_model = ./input/model3.pb


input = x_input,x_input0,x_input1,x_input2,x_input3,x_input4,x_input5,x_input6,x_input7,x_input8,x_input9


input_shape = [1, 224, 224, 3],[1, 56, 56, 3],[1, 28, 28, 4],[1, 28, 28, 4],[1, 14, 14, 8],[1, 14, 14, 8],[1, 14, 14, 8],[1, 14, 14, 12],[1, 14, 14, 12],[1, 7, 7, 20],[1, 7, 7, 20]


output_dir = ./






[AutoQuantizationTool]


quantize_method = SYMMETRIC


quant_precision = int8


ops_per_channel = DepthwiseConv


reverse_rgb = False


label_id_offset =


dataset_name =


detection_postprocess =


anchor_generator =


ts_max_file = ./input/max_dict2.npy


ts_min_file = ./input/min_dict2.npy


[GBuilder]


outputs = aipu_mobilenet_v2Z2_1104.bin


target = Z1_0701]










由于模型涉及到多输入多输出,故不能直接使用训练集模型自动量化,所以需要手动求出每个节点的最值,按照技术手册里规定的格式,分别保存max,min的值为dict文件。






3.2模型推理


需要对输出结果微调,以下为推理和微调代码。














/**


* @file main.cpp


* @brief


*


* Copyright (c) 2021 Sipeed team


* ********************************


* Modify by @zhai


* *******************************


*/


extern "C" {


#include


#include


#include


#include


#include "fbviewer.h"


#include "label.h"


#include


#include


}






#include "standard_api.h"


#include


#include


#include


#include


#include "opencv2/opencv.hpp"


using namespace cv;


#define DBG_LINE() printf("###L%drn", __LINE__)






int label_oft = 0;






typedef struct {


    int index;


    int8_t val;


} int8_data_t;






typedef struct {


    int index;


    uint8_t val;


} uint8_data_t;






std::list listbuffs;










int8_t flag_frame_num=0 ;










std::vector history;














int FindMax(int8_t a[], int n, int*pMaxPos)


{


  int i;


  int8_t max;


  max = a[0];      


  *pMaxPos = 0;      






  for (i=1; i

  {


    if (a > max)


    {


      max = a;


      *pMaxPos = i;   


    }


  }


  return max ;


}


cv::VideoCapture capture(0);






int init_cam(void)


{


    int x,y;


    getCurrentRes(&x, &y);


    printf("LCD width is %d, height is %dn", x, y);


    cv::Mat img;


    VideoCapture cap;


    cap.isOpened();


   


    if(!capture.isOpened())


    {


        std::cout<<"video not open."<

        return 1;


    }


    //get default video fps, set fps to 30fps


    double rate = capture.get(CAP_PROP_FPS);


    printf("rate is %lfn", rate);


    capture.set(CAP_PROP_FPS, 30);


    rate = capture.get(CAP_PROP_FPS);


    printf("rate is %lfn", rate);


    //get default video frame info


    double frame_width = capture.get(CAP_PROP_FRAME_WIDTH);


    double frame_height = capture.get(CAP_PROP_FRAME_HEIGHT);


    printf("frame_width is %lf, frame_height is %lfn", frame_width, frame_height);


    //set video frame size to QVGA (then we crop to 224x224)


    frame_width = 320;


    frame_height = 240;


    if(!capture.set(CAP_PROP_FRAME_WIDTH,frame_width))


    {


        printf("set width failedn");


        return 2;


    }


    if(!capture.set(CAP_PROP_FRAME_HEIGHT, frame_height))


    {


        printf("set width failedn");


        return 3;


    }


    return 0;


}






int init_graph(char* file_model, aipu_ctx_handle_t ** ctx, aipu_graph_desc_t* gdesc, aipu_buffer_alloc_info_t* info)


{


    const char* status_msg =NULL;


    aipu_status_t status = AIPU_STATUS_SUCCESS;


    int ret = 0;


    //Step1: init ctx handle


    status = AIPU_init_ctx(ctx);      


    if (status != AIPU_STATUS_SUCCESS) {


        AIPU_get_status_msg(status, &status_msg);


        printf("[DEMO ERROR] AIPU_init_ctx: %sn", status_msg);


        ret = -1;


        //goto out;


    }






    //Step2: load graph


    status = AIPU_load_graph_helper(*ctx, file_model, gdesc);


    if (status != AIPU_STATUS_SUCCESS) {


        AIPU_get_status_msg(status, &status_msg);


        printf("[DEMO ERROR] AIPU_load_graph_helper: %sn", status_msg);


        ret = -2;


        //goto deinit_ctx;


    }


    printf("[DEMO INFO] AIPU load graph successfully.n");






    //Step3: alloc tensor buffers


    status = AIPU_alloc_tensor_buffers(*ctx, gdesc, info);


    if (status != AIPU_STATUS_SUCCESS) {


        AIPU_get_status_msg(status, &status_msg);


        printf("[DEMO ERROR] AIPU_alloc_tensor_buffers: %sn", status_msg);


        ret = -3;


        //goto unload_graph;


    }


   


    return ret;


}






int cap_img(Mat* lcd_frame, Mat* ai_frame)


{   


    Rect roi(40, 0, 240/4*3, 240);  //16/9 -> 4/3  


    Rect input_roi(8, 8, 224, 224);


    Size dsize = Size(240, 240);


    if(!capture.read(*lcd_frame))


    {


        printf("no video framern");


        return -1;


    }


    *lcd_frame = (*lcd_frame)(roi).clone();


    rotate(*lcd_frame, *lcd_frame, ROTATE_180);


    resize(*lcd_frame, *lcd_frame, dsize);


    cvtColor(*lcd_frame, *lcd_frame, COLOR_BGR2RGB);


    *ai_frame = (*lcd_frame)(input_roi).clone();


    //*ai_frame = (*lcd_frame)(input_roi).clone() + Scalar(-123, -117,-104);


     ai_frame->convertTo(*ai_frame,CV_16SC3);


     add(Scalar(-127, -127,-127),*ai_frame,*ai_frame);


     ai_frame->convertTo(*ai_frame,CV_8SC3);






    return 0;


}






int infer_img(Mat* ai_frame,Mat* x0,Mat* x1,Mat* x2,Mat* x3,Mat* x4,Mat* x5,Mat* x6,Mat* x7,Mat* x8,Mat* x9, aipu_ctx_handle_t ** ctx, aipu_graph_desc_t* gdesc, aipu_buffer_alloc_info_t* info, int signed_flag, int* label_idx, int* label_prob)


{


    uint32_t job_id=0;


    const char* status_msg =NULL;


    int32_t time_out=-1;


    bool finish_job_successfully = true;


    aipu_status_t status = AIPU_STATUS_SUCCESS;


    int ret = 0;






    //std::cout<<* ai_frame<





    // printf("input_x:%dn",info->inputs.tensors[0].size);


    // printf("input_x0:%dn",info->inputs.tensors[1].size);


    // printf("input_x1:%dn",info->inputs.tensors[2].size);


    // printf("input_x2:%dn",info->inputs.tensors[3].size);


    // printf("input_x3:%dn",info->inputs.tensors[4].size);


    // printf("input_x4:%dn",info->inputs.tensors[5].size);


    // printf("input_x5:%dn",info->inputs.tensors[6].size);


    // printf("input_x6:%dn",info->inputs.tensors[7].size);


    // printf("input_x7:%dn",info->inputs.tensors[8].size);


    // printf("input_x8:%dn",info->inputs.tensors[9].size);


    // printf("input_x9:%dn",info->inputs.tensors[10].size);


   


   


    memcpy(info->inputs.tensors[0].va, ai_frame->data,info->inputs.tensors[0].size);










    memcpy(info->inputs.tensors[1].va, x0->data, info->inputs.tensors[1].size);


    memcpy(info->inputs.tensors[2].va, x1->data, info->inputs.tensors[2].size);


    memcpy(info->inputs.tensors[3].va, x2->data,info->inputs.tensors[3].size);


    memcpy(info->inputs.tensors[4].va, x3->data, info->inputs.tensors[4].size);


    memcpy(info->inputs.tensors[5].va, x4->data,info->inputs.tensors[5].size);


    memcpy(info->inputs.tensors[6].va, x5->data,info->inputs.tensors[6].size);


    memcpy(info->inputs.tensors[7].va, x6->data, info->inputs.tensors[7].size);


    memcpy(info->inputs.tensors[8].va, x7->data,info->inputs.tensors[8].size);


    memcpy(info->inputs.tensors[9].va, x8->data, info->inputs.tensors[9].size);


    memcpy(info->inputs.tensors[10].va, x9->data,info->inputs.tensors[10].size);






    flag_frame_num=1;


   






    status = AIPU_create_job(*ctx, gdesc, info->handle, &job_id);


    //std::cout<

    if (status != AIPU_STATUS_SUCCESS) {


        AIPU_get_status_msg(status, &status_msg);


        printf("[DEMO ERROR] AIPU_create_job: %sn", status_msg);


        ret = -1;


        //goto free_tensor_buffers;


    }


    status = AIPU_finish_job(*ctx, job_id, time_out);


    if (status != AIPU_STATUS_SUCCESS) {


        AIPU_get_status_msg(status, &status_msg);


        printf("[DEMO ERROR] AIPU_finish_job: %sn", status_msg);


        finish_job_successfully = false;


    } else {


        finish_job_successfully = true;


    }






    if (finish_job_successfully) {


        int8_t *result = (int8_t *)info->outputs.tensors[0].va;


        


        uint32_t size = info->outputs.tensors[0].size;


     






        int8_t *buff=(int8_t *)malloc(27*sizeof(int8_t));


        memcpy(buff, result, sizeof(int8_t)*27);


        if(listbuffs.size()>5){listbuffs.pop_front();}


        listbuffs.push_back(buff);














        int8_t *buffsum;


        buffsum=(int8_t *)calloc(27,sizeof(int8_t));






        int Len=listbuffs.size();


        int k=0;


        std::list::iterator iter;


        for(iter=listbuffs.begin();iter!=listbuffs.end();iter++){






           for(k=0;k<27;k++){buffsum[k]=buffsum[k]+(*iter)[k];}






        }






        int i=0;


   


        memcpy((x0->data),info->outputs.tensors[1].va, info->outputs.tensors[1].size);  


        memcpy((x1->data),info->outputs.tensors[2].va,  info->outputs.tensors[2].size);


        memcpy((x2->data),info->outputs.tensors[3].va,  info->outputs.tensors[3].size);


        memcpy((x3->data),info->outputs.tensors[4].va,  info->outputs.tensors[4].size);


        memcpy((x4->data),info->outputs.tensors[5].va,  info->outputs.tensors[5].size);


        memcpy((x5->data),info->outputs.tensors[6].va,  info->outputs.tensors[6].size);


        memcpy((x6->data),info->outputs.tensors[7].va,  info->outputs.tensors[7].size);


        memcpy((x7->data),info->outputs.tensors[8].va,  info->outputs.tensors[8].size);


        memcpy((x8->data),info->outputs.tensors[9].va,  info->outputs.tensors[9].size);


        memcpy((x9->data),info->outputs.tensors[10].va,  info->outputs.tensors[10].size);


















        int idx;


        


        FindMax(buffsum, 27, &idx);


        printf("idx:%dn",idx);


        int label_covert=2;


        int errors_fre[]={7, 8, 21, 22, 3,10,11,12,13};


        i=0;


        for(i=0;i<9;i++){


            if(idx==errors_fre){idx=history.back();}


        }






        if(idx==0){idx=history.back();}






        if(idx!=history.back()){


            if(history[history.size()-3]!=history.back()||history[history.size()-2]!=history.back()){idx=history.back();}


        }






        history.push_back(idx);






        *label_idx=history.back();


        if(history.size()>20){history.erase(history.begin());}


   


    }






    status = AIPU_clean_job(*ctx, job_id);


    if (status != AIPU_STATUS_SUCCESS) {


        AIPU_get_status_msg(status, &status_msg);


        printf("[TEST ERROR] AIPU_clean_job: %sn", status_msg);


        ret = -2;


        //goto free_tensor_buffers;


    }


    return ret;


}










float cal_fps(struct timeval start, struct timeval end)


{


    struct timeval interval;


    if (end.tv_usec >= start.tv_usec) {


        interval.tv_usec = end.tv_usec - start.tv_usec;


        interval.tv_sec = end.tv_sec - start.tv_sec;


    } else  {


        interval.tv_usec = 1000000 + end.tv_usec - start.tv_usec;


        interval.tv_sec = end.tv_sec - 1 - start.tv_sec;


    }


    float fps = 1000000.0 / interval.tv_usec;


    return fps;


}






volatile int exit_flag = 0;


void my_handler(int s){


    printf("Caught signal %dn",s);


    exit_flag = 1;


    return;


}














int main(int argc, char *argv[])


{


    int ret = 0;


    uint32_t job_id=0;


    int32_t time_out=-1;


    bool finish_job_successfully = true;


    int model_inw, model_inh, model_inch, model_outw, model_outh, model_outch, img_size;   


    int8_t* bmpbuf;


    cv::Mat lcd_frame;


    cv::Mat ai_frame;


    int label_idx, label_prob;


    struct timeval start, end;






    int flag_frame=0;


    float fps=0;






    // cv::Mat x0 = cv::Mat::zeros(56,56 , CV_32FC(3));


    // cv::Mat x1 = cv::Mat::zeros(28, 28, CV_32FC(4));


    // cv::Mat x2 = cv::Mat::zeros(28, 28, CV_32FC(4));


    // cv::Mat x3 = cv::Mat::zeros(14, 14, CV_32FC(8));


    // cv::Mat x4 = cv::Mat::zeros(14,14 , CV_32FC(8));


    // cv::Mat x5 = cv::Mat::zeros(14,14 , CV_32FC(8));


    // cv::Mat x6 = cv::Mat::zeros(14, 14, CV_32FC(12));


    // cv::Mat x7 = cv::Mat::zeros(14,14 , CV_32FC(12));


    // cv::Mat x8 = cv::Mat::zeros(7,7 , CV_32FC(20));


    // cv::Mat x9 = cv::Mat::zeros(7, 7, CV_32FC(20 ));






    cv::Mat x0 = cv::Mat::zeros(56,56 , CV_8SC(3));


    cv::Mat x1 = cv::Mat::zeros(28, 28, CV_8SC(4));


    cv::Mat x2 = cv::Mat::zeros(28, 28, CV_8SC(4));


    cv::Mat x3 = cv::Mat::zeros(14, 14, CV_8SC(8));


    cv::Mat x4 = cv::Mat::zeros(14,14 , CV_8SC(8));


    cv::Mat x5 = cv::Mat::zeros(14,14 , CV_8SC(8));


    cv::Mat x6 = cv::Mat::zeros(14, 14, CV_8SC(12));


    cv::Mat x7 = cv::Mat::zeros(14,14 , CV_8SC(12));


    cv::Mat x8 = cv::Mat::zeros(7,7 , CV_8SC(20));


    cv::Mat x9 = cv::Mat::zeros(7, 7, CV_8SC(20 ));


















   


   






    signal(SIGINT, my_handler);






   










    history.push_back(2);


    history.push_back(2);


   


   


    printf("Zhouyi Cam test program: rn");


    printf("Usage: rn");


    printf("    ./zhouyi aipu.bin signed [label_oft]rn");


    printf("    signed=0, uint8 output; =1, int8 outputrn");


    printf("    real_label_idx = predict_idx-label_oft, rn");


    printf("    NOTE: default cal with 224x224rn");






    aipu_ctx_handle_t * ctx = NULL;


    aipu_status_t status = AIPU_STATUS_SUCCESS;


    const char* status_msg =NULL;


    aipu_graph_desc_t gdesc;


    aipu_buffer_alloc_info_t info;






    //Step 0: parse input argv


    if(argc < 3) {


        printf("argc=%d errorrn", argc);


        return -1;


    }


    if(argc >3) label_oft = atoi(argv[3]);


    char* file_model= argv[1];


    int signed_flag = 1;


   


    //Step 1: set USB camera


    ret = init_cam();DBG_LINE();


    if(ret != 0) {


        printf("[DEMO ERROR] init_cam err: %sn", ret);


        goto out;


    }


   


    //Step 2: init model graph


    ret = init_graph(file_model, &ctx, &gdesc, &info);DBG_LINE();


    if(ret == -1) goto out;


    else if(ret == -2) goto deinit_ctx;


    else if(ret == -3) goto unload_graph;


   


    //MAIN LOOP


   


    while(!exit_flag)


    {


        //1. cap cam img


        if(cap_img(&lcd_frame, &ai_frame) != 0) {


            break;


        }


        //2. infer cam img, get label


        gettimeofday(&start, NULL);


        


       if(flag_frame%2==0){


            ret = infer_img(&ai_frame,&x0,&x1,&x2,&x3,&x4,&x5,&x6,&x7,&x8,&x9, &ctx, &gdesc, &info, signed_flag, &label_idx, &label_prob);


           


            


      


      


        if(ret != 0) goto free_tensor_buffers;


        gettimeofday(&end, NULL);






         fps = cal_fps(start, end);






         }


         flag_frame+=1;


        //3. draw lcd


        flip(lcd_frame, lcd_frame, 1);


        putText(lcd_frame, labels[label_idx], Point(0, 224), cv::FONT_HERSHEY_PLAIN, 1, Scalar(255,0,0), 2);


        


        char fps_str[16];


        sprintf(fps_str, "%.1ffps", fps);


        putText(lcd_frame, fps_str, Point(0, 16), cv::FONT_HERSHEY_PLAIN, 1, Scalar(255,0,0), 2);


      


        


        fb_display(lcd_frame.data, 0, 240, 240, 0, 0, 0, 0);


        


    }






free_tensor_buffers:


    status = AIPU_free_tensor_buffers(ctx, info.handle);


    if (status != AIPU_STATUS_SUCCESS) {


        AIPU_get_status_msg(status, &status_msg);


        printf("[DEMO ERROR] AIPU_free_tensor_buffers: %sn", status_msg);


        ret = -1;


    }


unload_graph:


    status = AIPU_unload_graph(ctx, &gdesc);


    if (status != AIPU_STATUS_SUCCESS) {


        AIPU_get_status_msg(status, &status_msg);


        printf("[DEMO ERROR] AIPU_unload_graph: %sn", status_msg);


        ret = -1;


    }


deinit_ctx:


    status = AIPU_deinit_ctx(ctx);


    if (status != AIPU_STATUS_SUCCESS) {


        AIPU_get_status_msg(status, &status_msg);


        printf("[DEMO ERROR] AIPU_deinit_ctx: %sn", status_msg);


        ret = -1;


    }






out:






    return ret;


}










四、代码地址


https://github.com/ZhaiFengYun/R329-AIPU-gesture-recognition
举报

更多回帖

×
20
完善资料,
赚取积分