摘要:

unimrcp vad 模块voice activity dector一直认为比较粗暴,而且unimrcp的社区也很久没有更新了。使用原始unimrcp如果只是用来做Demo演示,通过手动调整参数,还是可以的。但是距离生产环境,还是有很远的一段路。

这篇文章介绍如何使用webRtc vad模块替换原来的算法。

【题外话:昨天开了题目,因为有事,没有更新,今天补上】

unimrcp 的vad的模块,在libs/mpf/src/mpf_activity_detector.c 文件中,主要算法函数如下:

 static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
{
apr_size_t sum = ;
apr_size_t count = frame->codec_frame.size/;
const apr_int16_t *cur = frame->codec_frame.buffer;
const apr_int16_t *end = cur + count; for(; cur < end; cur++) {
if(*cur < ) {
sum -= *cur;
}
else {
sum += *cur;
}
} return sum / count;
}

大家看这个算法,非常简单粗暴,累加求其平均值,如果大于阈值,表示有声音,如果不大于,表示静音。并没有噪音检测。所以基本上就是不可用。

在上一篇文档介绍了WebRTC 的 VAD的算法,今天主要使用webRTC 的VAD的算法,替换该算法。步骤和上一篇介绍webRTC的是一致的。

 static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
{
//calculate samplesCount
apr_size_t samplesCount = frame->codec_frame.size/;
//default 10
int per_ms_frames = ;
//calculate samples
apr_size_t sampleRate = ;
//
size_t samples = sampleRate * per_ms_frames / ;
if (samples == ) return -;
//
size_t nTotal = (samplesCount / samples);
//buffer
int16_t *input = frame->codec_frame.buffer;
//init vad
VadInst * vadInst = WebRtcVad_Create();
if (vadInst == NULL) {
return -;
}
int status = WebRtcVad_Init(vadInst);
if (status != ) {
WebRtcVad_Free(vadInst);
return -;
}
//default 1
int16_t vad_mode = ;
status = WebRtcVad_set_mode(vadInst, vad_mode);
if (status != ) {
WebRtcVad_Free(vadInst);
return -;
}
int cnt = ;
int i = ;
for (i = ; i < nTotal; i++) {
int keep_weight = ;
int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples, keep_weight);
if (nVadRet == -) {
WebRtcVad_Free(vadInst);
return -;
} else {
if (nVadRet >= ) {
cnt++;
}
printf(" %d \t", nVadRet);
}
input += samples;
}
//if hunman voice < nTotal/10, as silent sample。maybe ...
//FIXME
if (cnt < nTotal/) {
return ;
}
else {
return ;
}
}
 WebRtcVad_Free(vadInst)

下面要更新主处理函数,保留他原有的TRANSION的中间状态逻辑,

 /** Process current frame */
MPF_DECLARE(mpf_detector_event_e) mpf_activity_detector_process(mpf_activity_detector_t *detector, const mpf_frame_t *frame)
{
mpf_detector_event_e det_event = MPF_DETECTOR_EVENT_NONE;
apr_size_t level = ;
if((frame->type & MEDIA_FRAME_TYPE_AUDIO) == MEDIA_FRAME_TYPE_AUDIO) {
/* first, calculate current activity level of processed frame */
level = mpf_activity_detector_level_calculate(frame);
#if 0
apt_log(APT_LOG_MARK,APT_PRIO_INFO,"Activity Detector --------------------- [%"APR_SIZE_T_FMT"]",level);
#endif
} if(detector->state == DETECTOR_STATE_INACTIVITY) {
//if(level >= detector->level_threshold) {
if(level >= ) {
/* start to detect activity */
mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY_TRANSITION);
}
else {
detector->duration += CODEC_FRAME_TIME_BASE;
if(detector->duration >= detector->noinput_timeout) {
/* detected noinput */
det_event = MPF_DETECTOR_EVENT_NOINPUT;
}
}
}
else if(detector->state == DETECTOR_STATE_ACTIVITY_TRANSITION) {
//if(level >= detector->level_threshold) {
if(level >= ) {
detector->duration += CODEC_FRAME_TIME_BASE;
if(detector->duration >= detector->speech_timeout) {
/* finally detected activity */
det_event = MPF_DETECTOR_EVENT_ACTIVITY;
mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY);
}
}
else {
/* fallback to inactivity */
mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY);
}
}
else if(detector->state == DETECTOR_STATE_ACTIVITY) {
//if(level >= detector->level_threshold) {
if(level >= ) {
detector->duration += CODEC_FRAME_TIME_BASE;
}
else {
/* start to detect inactivity */
mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY_TRANSITION);
}
}
else if(detector->state == DETECTOR_STATE_INACTIVITY_TRANSITION) {
//if(level >= detector->level_threshold) {
if(level >= ) {
/* fallback to activity */
mpf_activity_detector_state_change(detector,DETECTOR_STATE_ACTIVITY);
}
else {
detector->duration += CODEC_FRAME_TIME_BASE;
if(detector->duration >= detector->silence_timeout) {
/* detected inactivity */
det_event = MPF_DETECTOR_EVENT_INACTIVITY;
mpf_activity_detector_state_change(detector,DETECTOR_STATE_INACTIVITY);
}
}
} return det_event;
}

如此替换后,就完成了算法的更新。当然还需要调整一下cmake的相关的文件配置,加载相应的webRTC的vad文件。

static apr_size_t mpf_activity_detector_level_calculate(const mpf_frame_t *frame)
{
//calculate samplesCount
apr_size_t samplesCount = frame->codec_frame.size/;
//default 10
int per_ms_frames = ;
//calculate samples
apr_size_t sampleRate = ;
//
size_t samples = sampleRate * per_ms_frames / ;
if (samples == ) return -;
//
size_t nTotal = (samplesCount / samples);
//buffer
int16_t *input = frame->codec_frame.buffer;
//init vad
VadInst * vadInst = WebRtcVad_Create();
if (vadInst == NULL) {
return -;
}
int status = WebRtcVad_Init(vadInst);
if (status != ) {
WebRtcVad_Free(vadInst);
return -;
}
//default 1
int16_t vad_mode = ;
status = WebRtcVad_set_mode(vadInst, vad_mode);
if (status != ) {
WebRtcVad_Free(vadInst);
return -;
}
int cnt = ;
int i = ;
for (i = ; i < nTotal; i++) {
int keep_weight = ;
int nVadRet = WebRtcVad_Process(vadInst, sampleRate, input, samples, keep_weight);
if (nVadRet == -) {
WebRtcVad_Free(vadInst);
return -;
} else {
if (nVadRet >= ) {
cnt++;
}
printf(" %d \t", nVadRet);
}
input += samples;
}
//if hunman voice < nTotal/10, as silent sample
if (cnt < nTotal/) {
return ;
}
else {
return ;
}

最新文章

  1. 安装SVN客户端重启电脑之后,右键未出现SVN选项的原因
  2. HTML5 Canvas 高仿逼真 3D 布料图案效果
  3. Android成长日记-使用GridView显示多行数据
  4. activiti 源码笔记之startProcess
  5. swun 1184
  6. 56. Merge Intervals
  7. 听同事讲 Bayesian statistics: Part 2 - Bayesian inference
  8. UIView属性clipsTobounds的应用
  9. React Native &amp; Android &amp; iOS &amp; APK
  10. c# webapi上传、读取、删除图片
  11. BUAAOO第一单元的总结
  12. Wavelet Ridgelet Curvelet Contourlet Ripplet
  13. 字符串转 多行 ,判断给定一组id ,查库中不存在用
  14. C# IIS 服务器上传图片500解决办法
  15. 深入贯彻闭包思想,全面理解JS闭包形成过程
  16. 如何把EntityList转换成DataSet
  17. python之旅:并发编程之多进程理论部分
  18. 规则引擎以及blaze 规则库的集成初探之二——JSR94 的规则引擎API和实现
  19. C++之贪吃蛇
  20. (十一)__LINE__、__FUNCTION__的使用

热门文章

  1. Spring事务专题(三)事务的基本概念,Mysql事务处理原理
  2. RabbitMQ学习总结(1)-基础概念
  3. MySQL面试题!新鲜出炉~
  4. 大学生可用来接单,利用Python实现教务系统扩容抢课!
  5. Java 集合框架综述,这篇让你吃透!
  6. Python 为什么能支持任意的真值判断?
  7. Elasticsearch第一篇:在 Windows 上的环境搭建
  8. C#算法设计排序篇之04-选择排序(附带动画演示程序)
  9. C#LeetCode刷题之#26-删除排序数组中的重复项(Remove Duplicates from Sorted Array)
  10. Python多进程实现并行化随机森林