Deploy HiFi5 voice algorithm on Allwinner R128

aleksib

Deploy HiFi5 voice algorithm on Allwinner R128 [Copy link]

The deployment algorithm needs to complete the following four functions:

Create a DSP algorithm component directory and write code
Recording on DSP
Use algorithm acceleration library to accelerate algorithms
Inter-core communication
DUMP data to PC

Create a DSP algorithm component directory

Create and enter the directory:

mkdir -p lichee/rtos-components/thirdparty/my_dsp_asr/src
mkdir -p lichee/rtos-components/thirdparty/my_dsp_asr/inc
cd lichee/rtos-components/thirdparty/my_dsp_asr/

Write a Kconfig file:

menu "my dsp asr"

config COMPONENTS_MY_DSP_ASR
    bool "my dsp asr"
    depends on ARCH_DSP
    default n
    help
        to do

endmenu

After modifying Kconfig, it is recommended to re-menuconfig

Append to lichee/rtos-components/thirdparty/Kconfig:

source "components/common/thirdparty/my_dsp_asr/Kconfig"

Write Makefile:

obj-y += src/my_dsp_asr.o
#self
CFLAGS += -Icomponents/common/aw/asr_demo/inc/

Append to lichee/rtos-components/thirdparty/Makefile:

obj-$(CONFIG_COMPONENTS_MY_DSP_ASR) += my_dsp_asr/

Writing basic code

#include <stdio.h>
#include <stdint.h>
#include <stddef.h>
#include <string.h>
#include <console.h>
#include <FreeRTOS.h>
#include <task.h>

static void my_dsp_asr_thread(void *arg)
{
    size_t loop = (size_t)arg;
    printf("%s enter\n", __func__);
    while (loop--) {
        printf("%s %u\n", __func__, loop);
        vTaskDelay(500 / portTICK_PERIOD_MS);
    }
    printf("%s exit\n", __func__);

    vTaskDelete(NULL);
}

const char *thread_name = "my_dsp_asr_thread";
size_t stack_size = 0x4000;
size_t thread_priority = 1;
int cmd_my_dsp_asr(int argc, char *argv[])
{
    size_t loop = 10;
    TaskHandle_t handle = NULL;

    printf("%s enter\n", __func__);
    if(xTaskCreate(my_dsp_asr_thread, thread_name, stack_size, (void *)loop, thread_priority, &handle) != pdPASS) {
        printf("xTaskCreate %s failed!\n", thread_name);
    }
    printf("%s exit\n", __func__);

    return 0;
}
FINSH_FUNCTION_EXPORT_CMD(cmd_my_dsp_asr, my_dsp_asr, my dsp asr);

./build.sh menuconfig, select the following configuration:

CONFIG_COMPONENTS_MY_DSP_ASR

(The dependent components will be automatically selected according to the select field of Kconfig)

Check whether it is compiled into the firmware:

grep -r cmd_my_dsp_asr ./out/ --include=*.bin

can be seen:

lichee/dsp$ grep -r cmd_my_dsp_asr ./out/ --include=*.bin
Binary file ./out/r128s3/evb1/r128s2_dsp0_evb1.bin matches
Binary file ./out/r128s3/evb1/r128s2_dsp0_evb1_raw.bin matches
Binary file ./out/r128s3/evb1/r128s2_dsp0_evb1_xcc.bin matches

To burn the firmware, enter the following in the serial terminal:

rpccli dsp my_dsp_asr

You can add automatic startup later:

diff --git a/arch/sun20iw2/init-sun20iw2.c b/arch/sun20iw2/init-sun20iw2.c
index cfb2d45d..9b5c2a5d 100644
--- a/arch/sun20iw2/init-sun20iw2.c
+++ b/arch/sun20iw2/init-sun20iw2.c
@@ -160,5 +160,10 @@ void app_init(void)
        rpdata_ctrl_init();
 #endif

+#ifdef CONFIG_COMPONENTS_MY_DSP_ASR
+       int cmd_my_dsp_asr(int argc, char *argv[]);
+       cmd_my_dsp_asr(0, NULL);
+#endif
+
 }

Recording on DSP

Add in Kconfig:

     bool "my dsp asr"
     depends on ARCH_DSP
+    select COMPONENTS_AW_AUDIO_SYSTEM
     default n
     help

Add in Makefile

#audio system
CFLAGS += -Icomponents/common/aw/AudioSystem/include/

Reference Code:

#include "AudioRecord.h"
#define MS_PER_FRAME    (10)
#define RECORD_RATE     (16000)
#define RECORD_NAME "capture"
static const size_t record_time_ms = 10 * 1000;
static const uint32_t rate = RECORD_RATE;
static const uint8_t channels = 3;
static const uint8_t bitwidth = 16;
static int16_t record_buffer[RECORD_RATE * MS_PER_FRAME * 3 / 1000];
static void my_record_thread(void *arg)
{
    tAudioRecord *pAudioRecord = NULL;
    int ret;
    size_t time_ms = 0;

    printf("%s enter\n", __func__);
    pAudioRecord = AudioRecordCreate(RECORD_NAME);
    if (!pAudioRecord) {
        printf("%s:%u error!\n", __func__, __LINE__);
        // TODO
    }

    ret = AudioRecordSetup(pAudioRecord, rate, channels, bitwidth);
    if (ret) {
        printf("%s:%u error!\n", __func__, __LINE__);
        // TODO
    }

    while (time_ms < record_time_ms) {
        ret = AudioRecordRead(pAudioRecord, record_buffer, sizeof(record_buffer));
        if (ret < 0) {
            printf("%s:%u error!\n", __func__, __LINE__);
            // TODO
        }
        time_ms += MS_PER_FRAME;
        printf("%ums: read %d\n", time_ms, ret);
    }

    AudioRecordStop(pAudioRecord);
    AudioRecordDestroy(pAudioRecord);
    printf("%s exit\n", __func__);

    vTaskDelete(NULL);
}

Using Algorithm Acceleration Library

This section is just a code demonstration. Customers with HIFI5 authorization can obtain the source code packages of NatureDSP_Signal and xa_nnlib_api from Candence, which include the API documentation:

libxa_nnlib/doc/HiFi5-NNLib-ProgrammersGuide-API.pdf
hifi5_library/doc/NatureDSP_Signal_Library_Reference_HiFi5.pdf

Add in Kconfig:

     bool "my dsp asr"
     depends on ARCH_DSP
     select COMPONENTS_AW_AUDIO_SYSTEM
+    select COMPONENTS_XTENSA_HIFI5_NNLIB_LIBRARY
+    select COMPONENTS_XTENSA_HIFI5_VFPU_LIBRARY
     default n
     help

Add in Makefile

CFLAGS +=  -I components/thirdparty/xtensa/hifi5_nn_v170_library/include
CFLAGS +=  -I components/thirdparty/xtensa/hifi5_vfpu_v200_library/include

(The action of linking the acceleration library has been added in lichee/dsp/Makefile, so there is no need to add it again)

FFT reference code:

#include <math.h>
#include "NatureDSP_Signal.h"
#include "xa_nnlib_api.h"

#ifndef PI
#define PI             (3.141592653f)
#endif

struct cplxf_t {
    float r;
    float i;
};

static inline void make_src_cplxf_from_record_buffer(struct cplxf_t *output, int16_t *input, int N, uint8_t chs, uint8_t ch_index)
{
    int i = 0;
    for (i = 0; i < N; i++) {
        output[i].r = input[i * chs + ch_index];
        output[i].i = 0.0f;
    }
}

// N*3/4 *twdstep
static inline void make_twd_cplxf(struct cplxf_t *output, const int N, const int twdstep)
{
    int n, m;
    for (n = 0; n < (twdstep * N) / 4; n++) {
        for (m = 0; m < 3; m++) {
            float phi = 2 * PI * (m + 1) * n / (twdstep * N);
            output[n * 3 + m].r = cosf(phi);
            output[n * 3 + m].i = sinf(phi);
        }
    }
}

int get_max(const struct cplxf_t *input_cplxf, int N)
{
    float max = 0.0f;
    float index = 0;
    int i = 0;
    for (i = 0; i < N; i++) {
        float cur = input_cplxf[i].r * input_cplxf[i].r + input_cplxf[i].i * input_cplxf[i].i;
        if (cur > max) {
            max = cur;
            index = i;
        }
    }

    return index;
}

#define FFT_SIZE    (1024)
static struct cplxf_t g_tmp_cplxf[FFT_SIZE];
static int record_data_handler(struct cplxf_t *output_cplxf, const struct cplxf_t *input_cplxf, const struct cplxf_t *twd_cplxf, int N)
{
    if (N != FFT_SIZE) {
        printf("%s incorrect data length: %d\n", __func__, N);
        return -1;
    }

    memcpy(g_tmp_cplxf, input_cplxf, sizeof(g_tmp_cplxf));
    fft_cplxf_ie(
        (complex_float *)output_cplxf,
        (complex_float *)g_tmp_cplxf,
        (const complex_float *)twd_cplxf,
        1,
        FFT_SIZE);

    return 0;
}

Add in the recording code:

     ret = AudioRecordSetup(pAudioRecord, rate, channels, bitwidth);
     if (ret) {
         printf("%s:%u error!\n", __func__, __LINE__);
         // TODO
     }

+    static struct cplxf_t g_input_cplxf[FFT_SIZE];
+    static struct cplxf_t g_output_cplxf[FFT_SIZE];
+    static struct cplxf_t g_twd_cplxf[FFT_SIZE];
+    int N = rate * MS_PER_FRAME / 1000;
+    int twdstep = 1;
+    int max_index = -1;
+    make_twd_cplxf(g_twd_cplxf, N, twdstep);

     while (time_ms < record_time_ms) {
         ret = AudioRecordRead(pAudioRecord, record_buffer, sizeof(record_buffer));
         if (ret < 0) {
             printf("%s:%u error!\n", __func__, __LINE__);
             // TODO
         }
         time_ms += MS_PER_FRAME;
+        max_index = -1;
+        make_src_cplxf_from_record_buffer(g_input_cplxf, record_buffer, N, channels, 0);
+        if( !record_data_handler(g_output_cplxf, g_input_cplxf, g_twd_cplxf, N)) {
+            max_index = get_max(g_output_cplxf, N);
+        }
         printf("%ums: read %d, max: %d\n", time_ms, ret, max_index);
     }

Inter-core communication

The code is for reference only and does not contain actual business code

Add in Kconfig:

     bool "my dsp asr"
     depends on ARCH_DSP
     select COMPONENTS_AW_AUDIO_SYSTEM
+    select COMPONENTS_RPDATA
     select COMPONENTS_XTENSA_HIFI5_NNLIB_LIBRARY
     select COMPONENTS_XTENSA_HIFI5_VFPU_LIBRARY
     default n
     help

Add in Makefile

#rpdata
CFLAGS += -Icomponents/common/aw/rpdata/include/

Since inter-core communication requires the cooperation of the RV core, it is also necessary to write components for RV-side control and data reception. You can refer to the method of writing DSP algorithm components for writing: Kconfig:

menu "my rv asr"

config COMPONENTS_MY_RV_ASR
    bool "my RV asr"
    depends on !ARCH_DSP
    select COMPONENTS_RPDATA
    default n
    help
        to do

endmenu

(Note that it is !ARCH_DSP)

Other modifications are similar to those in the first section and will not be repeated; The menuconfig command on the rv side is mrtos_menuconfig. The rv side execution code does not require "rpccli dsp";

Generic code:

#include <rpdata.h>

struct my_rpd_t {
    rpdata_t *rpd; // both
    void *addr; // both
    size_t buf_len; // send
    void (*cb)(void *priv, void *data, unsigned int data_len); // recv
    void *priv; // recv
};

struct my_rpd_cfg_t {
    int dir; // both
    const char *type; // both
    const char *name; // both
    size_t buf_len; // send
    void (*cb)(void *priv, void *data, unsigned int data_len); // recv
    void *priv; // recv
};

static void rpd_ch_deinit(struct my_rpd_t *hdl)
{
    if (hdl->rpd) {
        rpdata_t *rpd = hdl->rpd;
        hdl->cb = NULL;
        hdl->priv = NULL;
        hdl->buf_len = 0;
        hdl->addr = NULL;
        hdl->rpd = NULL;
        rpdata_destroy(rpd);
    }
}

static int rpd_recv_ch_callback(rpdata_t *rpd, void *data, unsigned int data_len)
{
    struct my_rpd_t *hdl = (struct my_rpd_t *)rpdata_get_private_data(rpd);

    if (hdl->cb)
        hdl->cb(hdl->priv, data, data_len);

    return 0;
}

static struct rpdata_cbs rpd_recv_cb = {
    .recv_cb = rpd_recv_ch_callback,
};

static int rpd_recv_ch_init(struct my_rpd_t *hdl, struct my_rpd_cfg_t *cfg)
{
    printf("recv rpd dir:%d, type:%s, name:%s\n", cfg->dir, cfg->type, cfg->name);

    hdl->rpd = rpdata_connect(cfg->dir, cfg->type, cfg->name);
    if (!hdl->rpd) {
        printf("rpdata_connect failed!\n");
        return -1;
    }

    hdl->addr = rpdata_buffer_addr(hdl->rpd);
    if (!hdl->addr) {
        printf("rpdata_buffer_addr failed!\n");
        rpd_ch_deinit(hdl);
        return -1;
    }

    hdl->cb = cfg->cb;
    hdl->priv = cfg->priv;

    rpdata_set_private_data(hdl->rpd, hdl);
    rpdata_set_recv_cb(hdl->rpd, &rpd_recv_cb);
    return 0;
}

static int rpd_send_ch_init(struct my_rpd_t *hdl, struct my_rpd_cfg_t *cfg)
{
    printf("send rpd dir:%d, type:%s, name:%s, buf_len:%u\n", cfg->dir, cfg->type, cfg->name, cfg->buf_len);

    hdl->rpd = rpdata_create(cfg->dir, cfg->type, cfg->name, cfg->buf_len);
    if (!hdl->rpd) {
        printf("rpdata_create failed!\n");
        goto err;
    }

    hdl->addr = rpdata_buffer_addr(hdl->rpd);
    if (!hdl->addr) {
        printf("rpdata_buffer_addr failed!\n");
        goto err;
    }

    hdl->buf_len = cfg->buf_len;

    return 0;
err:
    rpd_ch_deinit(hdl);
    return -1;
}

static int rpd_send(struct my_rpd_t *hdl, void *data)
{
    memcpy(hdl->addr, data, hdl->buf_len);

    rpdata_wait_connect(hdl->rpd);

    return rpdata_send(hdl->rpd, 0, hdl->buf_len);
}

// RV to DSP 控制通道
#define RPD_CTL_TYPE                ("RVtoDSPCtl")
#define RPD_CTL_NAME                ("RVtoDSPCtlCh")
#define RPD_CTL_SIZE                (4)

// DSP to RV 数据通道
#define RPD_DATA_TYPE               ("DSPtoRVData")
#define RPD_DATA_NAME               ("DSPtoRVDataCh")
#define RPD_DATA_SIZE               (64)

DSP side code:

static int g_run = 0;
static void my_rpd_dsp_recv_cb(void *priv, void *data, unsigned int data_len)
{
    printf("%s recv:%d\n", __func__, data_len);

    memcpy(&g_run, data, sizeof(g_run));
}

static void my_rpd_thread(void *arg)
{
    struct my_rpd_t send_ch;
    struct my_rpd_cfg_t send_cfg = {
        .dir = 2,
        .type = RPD_DATA_TYPE,
        .name = RPD_DATA_NAME,
        .buf_len = RPD_DATA_SIZE,
    };
    struct my_rpd_t recv_ch;
    struct my_rpd_cfg_t recv_cfg = {
        .dir = 2,
        .type = RPD_CTL_TYPE,
        .name = RPD_CTL_NAME,
        .cb = my_rpd_dsp_recv_cb,
        .priv = NULL,
    };
    unsigned char data[RPD_DATA_SIZE];
    int i = 0;
    for (i = 0; i < RPD_DATA_SIZE; i++) {
        data[i] = '0' + 1;
    }

    g_run  = 1;

    printf("%s start\n", __func__);

    if (rpd_send_ch_init(&send_ch, &send_cfg)) {
        printf("%s:%u error!\n", __func__, __LINE__);
        // TODO
    }

    if (rpd_recv_ch_init(&recv_ch, &recv_cfg)) {
        printf("%s:%u error!\n", __func__, __LINE__);
        // TODO
    }

    while(g_run) {
        rpd_send(&send_ch, data);
        vTaskDelay(500 / portTICK_PERIOD_MS);
    }

    rpd_ch_deinit(&recv_ch);
    rpd_ch_deinit(&send_ch);
    printf("%s exit\n", __func__);

    vTaskDelete(NULL);
}

RV side code:

static int g_run = 0;

static void my_rpd_rv_recv_cb(void *priv, void *data, unsigned int data_len)
{
    printf("%s recv:%d\n", __func__, data_len);
}

static void my_rpd_thread(void *arg)
{
    struct my_rpd_t send_ch;
    struct my_rpd_cfg_t send_cfg = {
        .dir = 3,
        .type = RPD_CTL_TYPE,
        .name = RPD_CTL_NAME,
        .buf_len = RPD_CTL_SIZE,
    };
    struct my_rpd_t recv_ch;
    struct my_rpd_cfg_t recv_cfg = {
        .dir = 3,
        .type = RPD_DATA_TYPE,
        .name = RPD_DATA_NAME,
        .cb = my_rpd_rv_recv_cb,
        .priv = NULL,
    };
    g_run = 1;

    printf("%s start\n", __func__);
    if (rpd_send_ch_init(&send_ch, &send_cfg)) {
        printf("%s:%u error!\n", __func__, __LINE__);
        // TODO
    }

    if (rpd_recv_ch_init(&recv_ch, &recv_cfg)) {
        printf("%s:%u error!\n", __func__, __LINE__);
        // TODO
    }

    while(1) {
        int run = g_run;
        rpd_send(&send_ch, &run);
        if (!run)
            break;
        vTaskDelay(500 / portTICK_PERIOD_MS);
    }

    rpd_ch_deinit(&recv_ch);
    rpd_ch_deinit(&send_ch);
    printf("%s exit\n", __func__);

    vTaskDelete(NULL);
}

The RV side and the DSP side each create a processing thread;

DSP sends data to RV regularly, and RV sends running flags to DSP regularly;

RV modifies the value of g_run to 0, RV sends the value of g_run to DSP and then exits. DSP will also exit after receiving g_run.

DUMP data to PC

There is no file system on the DSP side, so the data needs to be sent to the RV core through inter-core communication, and then saved or sent by the RV core in some way;

On the RV core, you can use adb forward to transfer data to the PC in real time, or you can save the data to flash and then use adb pull to the PC;

The codes in this section are all run on the RV core ;

Existing packaging interfaces can be used:

// file_path和port只需指定一个即可，另一个填NULL或0
// 指定file_path表示保存数据到flash，指定port表示提供adb传输数据到PC
void *data_save_create(const char *name, const char *file_path, int port);
void data_save_destroy(void *_hdl);
int data_save_request(void *_hdl, void *data, int size, int timeout_ms);
// 需要保存后续数据到另一文件时调用，用于分割音频数据
int data_save_flush(void *_hdl, int timeout_ms);

You can copy lichee/rtos-components/aw/asr_demo/inc/data_save.h and lichee/rtos-components/aw/asr_demo/src/data_save.c to your own components, or use them directly when CONFIG_COMPONENTS_ASR_DEMO is selected;

Save to flash through standard file operation interface or save to flash by specifying file path in data_save component
Follow these steps to transfer data to PC via adb forward:

① Device side:

reboot(重启设备)
（等待设备重启完成）
adb shell af -p [代码中填写的port] -r

② PC:

adb forward tcp:11112 tcp:[代码中填写的port]
adb_record.py（或adb_record_3.py，2个文件都在lichee/rtos-components/aw/asr_demo/tools/，接收到的数据会保存在脚本执行目录下）

③ The device starts to call the data_save interface to create a channel and send data (refer to the first section to add start and stop transmission commands)

lugl4313820

Does this need to be uploaded to the PC to do the algorithm? Is the reasoning done locally?

Deploy HiFi5 voice algorithm on Allwinner R128 [Copy link]

Create a DSP algorithm component directory

Writing basic code

Recording on DSP

Using Algorithm Acceleration Library

Inter-core communication

DUMP data to PC

Latest reply