替换unimrcp的VAD模块

摘要：

unimrcp vad 模块voice activity dector一直认为比较粗暴，而且unimrcp的社区也很久没有更新了。使用原始unimrcp如果只是用来做Demo演示，通过手动调整参数，还是可以的。但是距离生产环境，还是有很远的一段路。

这篇文章介绍如何使用webRtc vad模块替换原来的算法。

【题外话：昨天开了题目，因为有事，没有更新，今天补上】

unimrcp 的vad的模块，在libs/mpf/src/mpf_activity_detector.c 文件中，主要算法函数如下：

 static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)

 {

     apr_size_t sum = ;

     apr_size_t count = frame->codec_frame.size/;

     const apr_int16_t *cur = frame->codec_frame.buffer;

     const apr_int16_t *end = cur + count;

     for(; cur < end; cur++) {

         if(*cur < ) {

             sum -= *cur;

         }

         else {

             sum += *cur;

         }

     }

     return sum / count;

 }

大家看这个算法，非常简单粗暴，累加求其平均值，如果大于阈值，表示有声音，如果不大于，表示静音。并没有噪音检测。所以基本上就是不可用。

在上一篇文档介绍了WebRTC 的 VAD的算法，今天主要使用webRTC 的VAD的算法，替换该算法。步骤和上一篇介绍webRTC的是一致的。

 static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)

 {

   //calculate samplesCount

   apr_size_t samplesCount = frame->codec_frame.size/;

   //default 10

   int per_ms_frames = ;

   //calculate samples

   apr_size_t sampleRate = ;

   //

   size_t samples = sampleRate * per_ms_frames / ;

   if (samples == ) return -;

   //

   size_t nTotal = (samplesCount / samples);

   //buffer

   int16_t *input = frame->codec_frame.buffer;

   //init vad

   VadInst * vadInst = WebRtcVad_Create();

   if (vadInst == NULL) {

     return -;

   }

   int status = WebRtcVad_Init(vadInst);

   if (status != ) {

     WebRtcVad_Free(vadInst);

     return -;

   }

   //default 1

   int16_t vad_mode = ;

   status = WebRtcVad_set_mode(vadInst, vad_mode);

   if (status != ) {

     WebRtcVad_Free(vadInst);

     return -;

   }

   int cnt = ;

   int i  = ;

   for (i = ; i < nTotal; i++) {

     int keep_weight = ;

     int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples, keep_weight);

     if (nVadRet == -) {

       WebRtcVad_Free(vadInst);

       return -;

     } else {

       if (nVadRet >= ) {

         cnt++;

       }

       printf(" %d \t", nVadRet);

     }

     input += samples;

   }

   //if hunman voice < nTotal/10, as silent sample。maybe ...
     //FIXME

   if (cnt < nTotal/) {

     return ;

   }

   else {

     return ;

   }

 }
    WebRtcVad_Free(vadInst)

下面要更新主处理函数，保留他原有的TRANSION的中间状态逻辑，

 /** Process current frame */

 MPF_DECLARE(mpf_detector_event_e) mpf_activity_detector_process(mpf_activity_detector_t *detector, const mpf_frame_t *frame)

 {

     mpf_detector_event_e det_event = MPF_DETECTOR_EVENT_NONE;

     apr_size_t level = ;

     if((frame->type & MEDIA_FRAME_TYPE_AUDIO) == MEDIA_FRAME_TYPE_AUDIO) {

         /* first, calculate current activity level of processed frame */

         level = mpf_activity_detector_level_calculate(frame);

 #if 0

         apt_log(APT_LOG_MARK,APT_PRIO_INFO,"Activity Detector --------------------- [%"APR_SIZE_T_FMT"]",level);

 #endif

     }

     if(detector->state == DETECTOR_STATE_INACTIVITY) {

         //if(level >= detector->level_threshold) {

         if(level >= ) {

             /* start to detect activity */

             mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY_TRANSITION);

         }

         else {

             detector->duration += CODEC_FRAME_TIME_BASE;

             if(detector->duration >= detector->noinput_timeout) {

                 /* detected noinput */

                 det_event = MPF_DETECTOR_EVENT_NOINPUT;

             }

         }

     }

     else if(detector->state == DETECTOR_STATE_ACTIVITY_TRANSITION) {

         //if(level >= detector->level_threshold) {

         if(level >= ) {

             detector->duration += CODEC_FRAME_TIME_BASE;

             if(detector->duration >= detector->speech_timeout) {

                 /* finally detected activity */

                 det_event = MPF_DETECTOR_EVENT_ACTIVITY;

                 mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY);

             }

         }

         else {

             /* fallback to inactivity */

             mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY);

         }

     }

     else if(detector->state == DETECTOR_STATE_ACTIVITY) {

         //if(level >= detector->level_threshold) {

         if(level >= ) {

             detector->duration += CODEC_FRAME_TIME_BASE;

         }

         else {

             /* start to detect inactivity */

             mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY_TRANSITION);

         }

     }

     else if(detector->state == DETECTOR_STATE_INACTIVITY_TRANSITION) {

         //if(level >= detector->level_threshold) {

         if(level >= ) {

             /* fallback to activity */

             mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY);

         }

         else {

             detector->duration += CODEC_FRAME_TIME_BASE;

             if(detector->duration >= detector->silence_timeout) {

                 /* detected inactivity */

                 det_event = MPF_DETECTOR_EVENT_INACTIVITY;

                 mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY);

             }

         }

     }

     return det_event;

 }

如此替换后，就完成了算法的更新。当然还需要调整一下cmake的相关的文件配置，加载相应的webRTC的vad文件。

static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)

{

  //calculate samplesCount

  apr_size_t samplesCount = frame->codec_frame.size/;

  //default 10

  int per_ms_frames = ;

  //calculate samples

  apr_size_t sampleRate = ;

  //

  size_t samples = sampleRate * per_ms_frames / ;

  if (samples == ) return -;

  //

  size_t nTotal = (samplesCount / samples);

  //buffer

  int16_t *input = frame->codec_frame.buffer;

  //init vad

  VadInst * vadInst = WebRtcVad_Create();

  if (vadInst == NULL) {

    return -;

  }

  int status = WebRtcVad_Init(vadInst);

  if (status != ) {

    WebRtcVad_Free(vadInst);

    return -;

  }

  //default 1

  int16_t vad_mode = ;

  status = WebRtcVad_set_mode(vadInst, vad_mode);

  if (status != ) {

    WebRtcVad_Free(vadInst);

    return -;

  }

  int cnt = ;

  int i  = ;

  for (i = ; i < nTotal; i++) {

    int keep_weight = ;

    int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples, keep_weight);

    if (nVadRet == -) {

      WebRtcVad_Free(vadInst);

      return -;

    } else {

      if (nVadRet >= ) {

        cnt++;

      }

      printf(" %d \t", nVadRet);

    }

    input += samples;

  }

  //if hunman voice < nTotal/10, as silent sample

  if (cnt < nTotal/) {

    return ;

  }

  else {

    return ;

  }

巴特西

替换unimrcp的VAD模块

最新文章

热门文章