// // libtgvoip is free and unencumbered public domain software. // For more information, see http://unlicense.org or the UNLICENSE file // you should have received with this source code distribution. // #include "OpusDecoder.h" #include "audio/Resampler.h" #include "logging.h" #include #include #include #ifdef HAVE_CONFIG_H #include #else #include "opus.h" #endif #include "VoIPController.h" #define PACKET_SIZE (960*2) using namespace tgvoip; tgvoip::OpusDecoder::OpusDecoder(const std::shared_ptr& dst, bool isAsync, bool needEC){ dst->SetCallback(OpusDecoder::Callback, this); Initialize(isAsync, needEC); } tgvoip::OpusDecoder::OpusDecoder(const std::unique_ptr& dst, bool isAsync, bool needEC){ dst->SetCallback(OpusDecoder::Callback, this); Initialize(isAsync, needEC); } tgvoip::OpusDecoder::OpusDecoder(MediaStreamItf* dst, bool isAsync, bool needEC){ dst->SetCallback(OpusDecoder::Callback, this); Initialize(isAsync, needEC); } void tgvoip::OpusDecoder::Initialize(bool isAsync, bool needEC){ async=isAsync; if(async){ decodedQueue=new BlockingQueue(33); semaphore=new Semaphore(32, 0); }else{ decodedQueue=NULL; semaphore=NULL; } dec=opus_decoder_create(48000, 1, NULL); if(needEC) ecDec=opus_decoder_create(48000, 1, NULL); else ecDec=NULL; buffer=(unsigned char *) malloc(8192); lastDecoded=NULL; outputBufferSize=0; echoCanceller=NULL; frameDuration=20; consecutiveLostPackets=0; enableDTX=false; silentPacketCount=0; levelMeter=NULL; nextLen=0; running=false; remainingDataLen=0; processedBuffer=NULL; prevWasEC=false; prevLastSample=0; } tgvoip::OpusDecoder::~OpusDecoder(){ opus_decoder_destroy(dec); if(ecDec) opus_decoder_destroy(ecDec); free(buffer); if(decodedQueue) delete decodedQueue; if(semaphore) delete semaphore; } void tgvoip::OpusDecoder::SetEchoCanceller(EchoCanceller* canceller){ echoCanceller=canceller; } size_t tgvoip::OpusDecoder::Callback(unsigned char *data, size_t len, void *param){ return ((OpusDecoder*)param)->HandleCallback(data, len); } size_t tgvoip::OpusDecoder::HandleCallback(unsigned char *data, size_t len){ if(async){ if(!running){ memset(data, 0, len); return 0; } if(outputBufferSize==0){ outputBufferSize=len; int packetsNeeded; if(len>PACKET_SIZE) packetsNeeded=len/PACKET_SIZE; else packetsNeeded=1; packetsNeeded*=2; semaphore->Release(packetsNeeded); } assert(outputBufferSize==len && "output buffer size is supposed to be the same throughout callbacks"); if(len==PACKET_SIZE){ Buffer lastDecoded=decodedQueue->GetBlocking(); if(lastDecoded.IsEmpty()) return 0; memcpy(data, *lastDecoded, PACKET_SIZE); semaphore->Release(); if(silentPacketCount>0){ silentPacketCount--; if(levelMeter) levelMeter->Update(reinterpret_cast(data), 0); return 0; } if(echoCanceller){ echoCanceller->SpeakerOutCallback(data, PACKET_SIZE); } }else{ LOGE("Opus decoder buffer length != 960 samples"); abort(); } }else{ if(remainingDataLen==0 && silentPacketCount==0){ int duration=DecodeNextFrame(); remainingDataLen=(size_t) (duration/20*960*2); } if(silentPacketCount>0 || remainingDataLen==0 || !processedBuffer){ if(silentPacketCount>0) silentPacketCount--; memset(data, 0, 960*2); if(levelMeter) levelMeter->Update(reinterpret_cast(data), 0); return 0; } memcpy(data, processedBuffer, 960*2); remainingDataLen-=960*2; if(remainingDataLen>0){ memmove(processedBuffer, processedBuffer+960*2, remainingDataLen); } } if(levelMeter) levelMeter->Update(reinterpret_cast(data), len/2); return len; } void tgvoip::OpusDecoder::Start(){ if(!async) return; running=true; thread=new Thread(std::bind(&tgvoip::OpusDecoder::RunThread, this)); thread->SetName("opus_decoder"); thread->SetMaxPriority(); thread->Start(); } void tgvoip::OpusDecoder::Stop(){ if(!running || !async) return; running=false; semaphore->Release(); thread->Join(); delete thread; } void tgvoip::OpusDecoder::RunThread(){ int i; LOGI("decoder: packets per frame %d", packetsPerFrame); while(running){ int playbackDuration=DecodeNextFrame(); for(i=0;iAcquire(); if(!running){ LOGI("==== decoder exiting ===="); return; } try{ Buffer buf=bufferPool.Get(); if(remainingDataLen>0){ for(effects::AudioEffect*& effect:postProcEffects){ effect->Process(reinterpret_cast(processedBuffer+(PACKET_SIZE*i)), 960); } buf.CopyFrom(processedBuffer+(PACKET_SIZE*i), 0, PACKET_SIZE); }else{ //LOGE("Error decoding, result=%d", size); memset(*buf, 0, PACKET_SIZE); } decodedQueue->Put(std::move(buf)); }catch(std::bad_alloc& x){ LOGW("decoder: no buffers left!"); } } } } int tgvoip::OpusDecoder::DecodeNextFrame(){ int playbackDuration=0; bool isEC=false; size_t len=jitterBuffer->HandleOutput(buffer, 8192, 0, true, playbackDuration, isEC); bool fec=false; if(!len){ fec=true; len=jitterBuffer->HandleOutput(buffer, 8192, 0, false, playbackDuration, isEC); //if(len) // LOGV("Trying FEC..."); } int size; if(len){ size=opus_decode(isEC ? ecDec : dec, buffer, len, (opus_int16 *) decodeBuffer, packetsPerFrame*960, fec ? 1 : 0); consecutiveLostPackets=0; if(prevWasEC!=isEC && size){ // It turns out the waveforms generated by the PLC feature are also great to help smooth out the // otherwise audible transition between the frames from different decoders. Those are basically an extrapolation // of the previous successfully decoded data -- which is exactly what we need here. size=opus_decode(prevWasEC ? ecDec : dec, NULL, 0, (opus_int16*)nextBuffer, packetsPerFrame*960, 0); if(size){ int16_t* plcSamples=reinterpret_cast(nextBuffer); int16_t* samples=reinterpret_cast(decodeBuffer); constexpr float coeffs[]={0.999802, 0.995062, 0.984031, 0.966778, 0.943413, 0.914084, 0.878975, 0.838309, 0.792344, 0.741368, 0.685706, 0.625708, 0.561754, 0.494249, 0.423619, 0.350311, 0.274788, 0.197527, 0.119018, 0.039757}; for(int i=0;i<20;i++){ samples[i]=(int16_t)round((plcSamples[i]*coeffs[i]+(float)samples[i]*(1.0-coeffs[i]))); } } } prevWasEC=isEC; prevLastSample=decodeBuffer[size-1]; }else{ // do packet loss concealment consecutiveLostPackets++; if(consecutiveLostPackets>2 && enableDTX){ silentPacketCount+=packetsPerFrame; size=packetsPerFrame*960; }else{ size=opus_decode(prevWasEC ? ecDec : dec, NULL, 0, (opus_int16 *) decodeBuffer, packetsPerFrame*960, 0); //LOGV("PLC"); } } if(size<0) LOGW("decoder: opus_decode error %d", size); remainingDataLen=size; if(playbackDuration==80){ processedBuffer=buffer; audio::Resampler::Rescale60To80((int16_t*) decodeBuffer, (int16_t*) processedBuffer); }else if(playbackDuration==40){ processedBuffer=buffer; audio::Resampler::Rescale60To40((int16_t*) decodeBuffer, (int16_t*) processedBuffer); }else{ processedBuffer=decodeBuffer; } return playbackDuration; } void tgvoip::OpusDecoder::SetFrameDuration(uint32_t duration){ frameDuration=duration; packetsPerFrame=frameDuration/20; } void tgvoip::OpusDecoder::SetJitterBuffer(std::shared_ptr jitterBuffer){ this->jitterBuffer=jitterBuffer; } void tgvoip::OpusDecoder::SetDTX(bool enable){ enableDTX=enable; } void tgvoip::OpusDecoder::SetLevelMeter(AudioLevelMeter *levelMeter){ this->levelMeter=levelMeter; } void tgvoip::OpusDecoder::AddAudioEffect(effects::AudioEffect *effect){ postProcEffects.push_back(effect); } void tgvoip::OpusDecoder::RemoveAudioEffect(effects::AudioEffect *effect){ std::vector::iterator i=std::find(postProcEffects.begin(), postProcEffects.end(), effect); if(i!=postProcEffects.end()) postProcEffects.erase(i); }