From f85ce99894f16c1e10cd29321feeab8376adcf77 Mon Sep 17 00:00:00 2001 From: Grishka Date: Fri, 30 Nov 2018 15:39:31 +0300 Subject: [PATCH] Save more data in data saving mode --- EchoCanceller.cpp | 19 ++++++++++-- EchoCanceller.h | 4 ++- OpusEncoder.cpp | 48 +++++++++++++++++++++++++++---- OpusEncoder.h | 3 ++ VoIPController.cpp | 5 +++- os/darwin/AudioInputAudioUnit.cpp | 3 +- 6 files changed, 69 insertions(+), 13 deletions(-) diff --git a/EchoCanceller.cpp b/EchoCanceller.cpp index 05ff25f..6e866bd 100755 --- a/EchoCanceller.cpp +++ b/EchoCanceller.cpp @@ -49,6 +49,7 @@ EchoCanceller::EchoCanceller(bool enableAEC, bool enableNS, bool enableAGC){ apm->gain_control()->set_mode(webrtc::GainControl::Mode::kAdaptiveDigital); apm->gain_control()->set_target_level_dbfs(9); } + apm->voice_detection()->set_likelihood(webrtc::VoiceDetection::Likelihood::kVeryLowLikelihood); audioFrame=new webrtc::AudioFrame(); audioFrame->samples_per_channel_=480; @@ -119,7 +120,7 @@ void EchoCanceller::Enable(bool enabled){ isOn=enabled; } -void EchoCanceller::ProcessInput(int16_t* inOut, size_t numSamples){ +void EchoCanceller::ProcessInput(int16_t* inOut, size_t numSamples, bool& hasVoice){ if(!isOn || (!enableAEC && !enableAGC && !enableNS)){ return; } @@ -127,12 +128,19 @@ void EchoCanceller::ProcessInput(int16_t* inOut, size_t numSamples){ assert(numSamples==960); memcpy(audioFrame->mutable_data(), inOut, 480*2); - apm->set_stream_delay_ms(delay); + if(enableAEC) + apm->set_stream_delay_ms(delay); apm->ProcessStream(audioFrame); + if(enableVAD) + hasVoice=apm->voice_detection()->stream_has_voice(); memcpy(inOut, audioFrame->data(), 480*2); memcpy(audioFrame->mutable_data(), inOut+480, 480*2); - apm->set_stream_delay_ms(delay); + if(enableAEC) + apm->set_stream_delay_ms(delay); apm->ProcessStream(audioFrame); + if(enableVAD){ + hasVoice=hasVoice || apm->voice_detection()->stream_has_voice(); + } memcpy(inOut+480, audioFrame->data(), 480*2); } @@ -149,6 +157,11 @@ void EchoCanceller::SetAECStrength(int strength){ #endif } +void EchoCanceller::SetVoiceDetectionEnabled(bool enabled){ + enableVAD=enabled; + apm->voice_detection()->Enable(enabled); +} + AudioEffect::~AudioEffect(){ } diff --git a/EchoCanceller.h b/EchoCanceller.h index 62748e8..3dff7cc 100755 --- a/EchoCanceller.h +++ b/EchoCanceller.h @@ -29,13 +29,15 @@ public: virtual void Stop(); void SpeakerOutCallback(unsigned char* data, size_t len); void Enable(bool enabled); - void ProcessInput(int16_t* inOut, size_t numSamples); + void ProcessInput(int16_t* inOut, size_t numSamples, bool& hasVoice); void SetAECStrength(int strength); + void SetVoiceDetectionEnabled(bool enabled); private: bool enableAEC; bool enableAGC; bool enableNS; + bool enableVAD=false; bool isOn; #ifndef TGVOIP_NO_DSP webrtc::AudioProcessing* apm=NULL; diff --git a/OpusEncoder.cpp b/OpusEncoder.cpp index 4fc485f..70dbb40 100755 --- a/OpusEncoder.cpp +++ b/OpusEncoder.cpp @@ -23,7 +23,7 @@ tgvoip::OpusEncoder::OpusEncoder(MediaStreamItf *source, bool needSecondary):que opus_encoder_ctl(enc, OPUS_SET_INBAND_FEC(1)); opus_encoder_ctl(enc, OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE)); opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); - requestedBitrate=32000; + requestedBitrate=20000; currentBitrate=0; running=false; echoCanceller=NULL; @@ -32,8 +32,9 @@ tgvoip::OpusEncoder::OpusEncoder(MediaStreamItf *source, bool needSecondary):que levelMeter=NULL; mediumCorrectionBitrate=static_cast(ServerConfig::GetSharedInstance()->GetInt("audio_medium_fec_bitrate", 10000)); strongCorrectionBitrate=static_cast(ServerConfig::GetSharedInstance()->GetInt("audio_strong_fec_bitrate", 8000)); - mediumCorrectionMultiplier=ServerConfig::GetSharedInstance()->GetDouble("audio_medium_fec_multiplier", 1.5); - strongCorrectionMultiplier=ServerConfig::GetSharedInstance()->GetDouble("audio_strong_fec_multiplier", 2.0); + mediumCorrectionMultiplier=ServerConfig::GetSharedInstance()->GetDouble("audio_medium_fec_multiplier", 0.8); + strongCorrectionMultiplier=ServerConfig::GetSharedInstance()->GetDouble("audio_strong_fec_multiplier", 0.5); + vadNoVoiceBitrate=static_cast(ServerConfig::GetSharedInstance()->GetInt("audio_vad_no_voice_bitrate", 6000)); secondaryEncoderEnabled=false; if(needSecondary){ @@ -44,7 +45,7 @@ tgvoip::OpusEncoder::OpusEncoder(MediaStreamItf *source, bool needSecondary):que opus_encoder_ctl(secondaryEncoder, OPUS_SET_BITRATE(8000)); opus_encoder_ctl(secondaryEncoder, OPUS_SET_INBAND_FEC(1)); opus_encoder_ctl(secondaryEncoder, OPUS_SET_PACKET_LOSS_PERC(15)); - opus_encoder_ctl(secondaryEncoder, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); + opus_encoder_ctl(secondaryEncoder, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_SUPERWIDEBAND)); }else{ secondaryEncoder=NULL; } @@ -140,19 +141,50 @@ void tgvoip::OpusEncoder::RunThread(){ frame=(int16_t*) malloc(960*2*packetsPerFrame); else frame=NULL; + bool frameHasVoice=false; + bool wasVadMode=false; while(running){ int16_t* packet=(int16_t*)queue.GetBlocking(); if(packet){ + bool hasVoice=true; if(echoCanceller) - echoCanceller->ProcessInput(packet, 960); + echoCanceller->ProcessInput(packet, 960, hasVoice); if(packetsPerFrame==1){ Encode(packet, 960); }else{ memcpy(frame+(960*bufferedCount), packet, 960*2); + frameHasVoice=frameHasVoice || hasVoice; bufferedCount++; if(bufferedCount==packetsPerFrame){ + if(vadMode){ + if(frameHasVoice){ + opus_encoder_ctl(enc, OPUS_SET_BITRATE(currentBitrate)); + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_SUPERWIDEBAND)); + if(secondaryEncoder){ + opus_encoder_ctl(secondaryEncoder, OPUS_SET_BITRATE(currentBitrate)); + opus_encoder_ctl(secondaryEncoder, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_SUPERWIDEBAND)); + } + }else{ + opus_encoder_ctl(enc, OPUS_SET_BITRATE(vadNoVoiceBitrate)); + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND)); + if(secondaryEncoder){ + opus_encoder_ctl(secondaryEncoder, OPUS_SET_BITRATE(vadNoVoiceBitrate)); + opus_encoder_ctl(secondaryEncoder, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_NARROWBAND)); + } + } + wasVadMode=true; + }else if(wasVadMode){ + wasVadMode=false; + opus_encoder_ctl(enc, OPUS_SET_BITRATE(currentBitrate)); + opus_encoder_ctl(enc, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_FULLBAND)); + if(secondaryEncoder){ + opus_encoder_ctl(secondaryEncoder, OPUS_SET_BITRATE(currentBitrate)); + opus_encoder_ctl(secondaryEncoder, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH_SUPERWIDEBAND)); + } + } Encode(frame, 960*packetsPerFrame); bufferedCount=0; + frameHasVoice=false; } } bufferPool.Reuse(reinterpret_cast(packet)); @@ -169,7 +201,7 @@ void tgvoip::OpusEncoder::SetOutputFrameDuration(uint32_t duration){ void tgvoip::OpusEncoder::SetPacketLoss(int percent){ - packetLossPercent=percent; + packetLossPercent=std::min(20, percent); double multiplier=1; if(currentBitrate<=strongCorrectionBitrate) multiplier=strongCorrectionMultiplier; @@ -202,3 +234,7 @@ void tgvoip::OpusEncoder::InvokeCallback(unsigned char *data, size_t length, uns void tgvoip::OpusEncoder::SetSecondaryEncoderEnabled(bool enabled){ secondaryEncoderEnabled=enabled; } + +void tgvoip::OpusEncoder::SetVadMode(bool vad){ + vadMode=vad; +} diff --git a/OpusEncoder.h b/OpusEncoder.h index 26f612f..5a1fe83 100755 --- a/OpusEncoder.h +++ b/OpusEncoder.h @@ -37,6 +37,7 @@ public: void SetLevelMeter(AudioLevelMeter* levelMeter); void SetCallback(void (*f)(unsigned char*, size_t, unsigned char*, size_t, void*), void* param); void SetSecondaryEncoderEnabled(bool enabled); + void SetVadMode(bool vad); private: static size_t Callback(unsigned char* data, size_t len, void* param); @@ -63,6 +64,8 @@ private: double strongCorrectionMultiplier; AudioLevelMeter* levelMeter; bool secondaryEncoderEnabled; + bool vadMode=false; + uint32_t vadNoVoiceBitrate; void (*callback)(unsigned char*, size_t, unsigned char*, size_t, void*); void* callbackParam; diff --git a/VoIPController.cpp b/VoIPController.cpp index a97e7d0..9cd11ee 100755 --- a/VoIPController.cpp +++ b/VoIPController.cpp @@ -1159,6 +1159,9 @@ void VoIPController::UpdateAudioBitrateLimit(){ maxBitrate=maxAudioBitrate; encoder->SetBitrate(initAudioBitrate); } + encoder->SetVadMode(dataSavingMode || dataSavingRequestedByPeer); + if(echoCanceller) + echoCanceller->SetVoiceDetectionEnabled(dataSavingMode || dataSavingRequestedByPeer); } } @@ -1978,7 +1981,7 @@ simpleAudioBlock random_id:long random_bytes:string raw_data:string = DecryptedA DebugLoggedPacket dpkt={ static_cast(pseq), GetCurrentTime()-connectionInitTime, - static_cast(packetInnerLen) + static_cast(packet.length) }; debugLoggedPackets.push_back(dpkt); if(debugLoggedPackets.size()>=2500){ diff --git a/os/darwin/AudioInputAudioUnit.cpp b/os/darwin/AudioInputAudioUnit.cpp index 35b8522..3f80426 100644 --- a/os/darwin/AudioInputAudioUnit.cpp +++ b/os/darwin/AudioInputAudioUnit.cpp @@ -41,14 +41,13 @@ void AudioInputAudioUnit::Stop(){ void AudioInputAudioUnit::HandleBufferCallback(AudioBufferList *ioData){ int i; - int j; for(i=0;imNumberBuffers;i++){ AudioBuffer buf=ioData->mBuffers[i]; #if TARGET_OS_OSX assert(remainingDataSize+buf.mDataByteSize/2<10240); float* src=reinterpret_cast(buf.mData); int16_t* dst=reinterpret_cast(remainingData+remainingDataSize); - for(j=0;j