1
0
mirror of https://github.com/danog/libtgvoip.git synced 2025-01-22 21:11:36 +01:00
libtgvoip/OpusDecoder.cpp

281 lines
7.8 KiB
C++
Raw Normal View History

2017-02-02 19:24:40 +03:00
//
// libtgvoip is free and unencumbered public domain software.
// For more information, see http://unlicense.org or the UNLICENSE file
// you should have received with this source code distribution.
//
#include "OpusDecoder.h"
#include "audio/Resampler.h"
2017-02-02 19:24:40 +03:00
#include "logging.h"
#include <assert.h>
2018-06-05 00:28:38 +03:00
#include <math.h>
#include <algorithm>
2017-02-02 19:24:40 +03:00
2018-05-15 21:23:46 +03:00
#include "VoIPController.h"
2017-02-02 19:24:40 +03:00
#define PACKET_SIZE (960*2)
using namespace tgvoip;
2018-06-04 22:37:43 +03:00
tgvoip::OpusDecoder::OpusDecoder(const std::shared_ptr<MediaStreamItf>& dst, bool isAsync, bool needEC){
dst->SetCallback(OpusDecoder::Callback, this);
Initialize(isAsync, needEC);
}
tgvoip::OpusDecoder::OpusDecoder(const std::unique_ptr<MediaStreamItf>& dst, bool isAsync, bool needEC){
dst->SetCallback(OpusDecoder::Callback, this);
2018-06-04 22:37:43 +03:00
Initialize(isAsync, needEC);
}
void tgvoip::OpusDecoder::Initialize(bool isAsync, bool needEC){
async=isAsync;
2018-05-15 21:23:46 +03:00
if(async){
decodedQueue=new BlockingQueue<unsigned char*>(33);
bufferPool=new BufferPool(PACKET_SIZE, 32);
semaphore=new Semaphore(32, 0);
}else{
decodedQueue=NULL;
bufferPool=NULL;
semaphore=NULL;
}
2017-02-02 19:24:40 +03:00
dec=opus_decoder_create(48000, 1, NULL);
2018-06-04 22:37:43 +03:00
if(needEC)
ecDec=opus_decoder_create(48000, 1, NULL);
else
ecDec=NULL;
buffer=(unsigned char *) malloc(8192);
2017-02-02 19:24:40 +03:00
lastDecoded=NULL;
outputBufferSize=0;
echoCanceller=NULL;
frameDuration=20;
2018-05-15 21:23:46 +03:00
consecutiveLostPackets=0;
enableDTX=false;
silentPacketCount=0;
levelMeter=NULL;
nextLen=0;
running=false;
remainingDataLen=0;
processedBuffer=NULL;
2018-06-04 22:37:43 +03:00
prevWasEC=false;
prevLastSample=0;
2017-02-02 19:24:40 +03:00
}
tgvoip::OpusDecoder::~OpusDecoder(){
2017-02-02 19:24:40 +03:00
opus_decoder_destroy(dec);
2018-06-04 22:37:43 +03:00
if(ecDec)
opus_decoder_destroy(ecDec);
2017-02-02 19:24:40 +03:00
free(buffer);
2018-05-15 21:23:46 +03:00
if(bufferPool)
delete bufferPool;
if(decodedQueue)
delete decodedQueue;
if(semaphore)
delete semaphore;
2017-02-02 19:24:40 +03:00
}
void tgvoip::OpusDecoder::SetEchoCanceller(EchoCanceller* canceller){
2017-02-02 19:24:40 +03:00
echoCanceller=canceller;
}
size_t tgvoip::OpusDecoder::Callback(unsigned char *data, size_t len, void *param){
2018-05-15 21:23:46 +03:00
return ((OpusDecoder*)param)->HandleCallback(data, len);
2017-02-02 19:24:40 +03:00
}
2018-05-15 21:23:46 +03:00
size_t tgvoip::OpusDecoder::HandleCallback(unsigned char *data, size_t len){
if(async){
if(!running){
memset(data, 0, len);
return 0;
2017-02-02 19:24:40 +03:00
}
2018-05-15 21:23:46 +03:00
if(outputBufferSize==0){
outputBufferSize=len;
int packetsNeeded;
if(len>PACKET_SIZE)
packetsNeeded=len/PACKET_SIZE;
2017-02-02 19:24:40 +03:00
else
2018-05-15 21:23:46 +03:00
packetsNeeded=1;
packetsNeeded*=2;
semaphore->Release(packetsNeeded);
2017-02-02 19:24:40 +03:00
}
2018-05-15 21:23:46 +03:00
assert(outputBufferSize==len && "output buffer size is supposed to be the same throughout callbacks");
2017-02-02 19:24:40 +03:00
if(len==PACKET_SIZE){
2018-05-15 21:23:46 +03:00
lastDecoded=(unsigned char *) decodedQueue->GetBlocking();
if(!lastDecoded)
return 0;
memcpy(data, lastDecoded, PACKET_SIZE);
bufferPool->Reuse(lastDecoded);
semaphore->Release();
if(silentPacketCount>0){
silentPacketCount--;
if(levelMeter)
levelMeter->Update(reinterpret_cast<int16_t *>(data), 0);
return 0;
}
if(echoCanceller){
echoCanceller->SpeakerOutCallback(data, PACKET_SIZE);
2017-02-02 19:24:40 +03:00
}
2018-05-15 21:23:46 +03:00
}else{
LOGE("Opus decoder buffer length != 960 samples");
abort();
2017-02-02 19:24:40 +03:00
}
}else{
2018-05-15 21:23:46 +03:00
if(remainingDataLen==0 && silentPacketCount==0){
int duration=DecodeNextFrame();
remainingDataLen=(size_t) (duration/20*960*2);
}
if(silentPacketCount>0 || remainingDataLen==0 || !processedBuffer){
if(silentPacketCount>0)
silentPacketCount--;
memset(data, 0, 960*2);
if(levelMeter)
levelMeter->Update(reinterpret_cast<int16_t *>(data), 0);
return 0;
}
memcpy(data, processedBuffer, 960*2);
remainingDataLen-=960*2;
if(remainingDataLen>0){
memmove(processedBuffer, processedBuffer+960*2, remainingDataLen);
}
}
if(levelMeter)
levelMeter->Update(reinterpret_cast<int16_t *>(data), len/2);
return len;
2017-02-02 19:24:40 +03:00
}
void tgvoip::OpusDecoder::Start(){
2018-05-15 21:23:46 +03:00
if(!async)
return;
2017-02-02 19:24:40 +03:00
running=true;
2018-05-15 21:23:46 +03:00
thread=new Thread(new MethodPointer<tgvoip::OpusDecoder>(&tgvoip::OpusDecoder::RunThread, this), NULL);
thread->SetName("opus_decoder");
thread->SetMaxPriority();
thread->Start();
2017-02-02 19:24:40 +03:00
}
void tgvoip::OpusDecoder::Stop(){
2018-05-15 21:23:46 +03:00
if(!running || !async)
2017-02-02 19:24:40 +03:00
return;
running=false;
2018-05-15 21:23:46 +03:00
semaphore->Release();
thread->Join();
delete thread;
2017-02-02 19:24:40 +03:00
}
2018-05-15 21:23:46 +03:00
void tgvoip::OpusDecoder::RunThread(void* param){
2017-02-02 19:24:40 +03:00
int i;
LOGI("decoder: packets per frame %d", packetsPerFrame);
while(running){
2018-05-15 21:23:46 +03:00
int playbackDuration=DecodeNextFrame();
for(i=0;i<playbackDuration/20;i++){
semaphore->Acquire();
2017-04-28 15:42:48 +03:00
if(!running){
LOGI("==== decoder exiting ====");
return;
}
2017-02-02 19:24:40 +03:00
unsigned char *buf=bufferPool->Get();
if(buf){
2018-05-15 21:23:46 +03:00
if(remainingDataLen>0){
for(std::vector<AudioEffect*>::iterator effect=postProcEffects.begin();effect!=postProcEffects.end();++effect){
(*effect)->Process(reinterpret_cast<int16_t*>(processedBuffer+(PACKET_SIZE*i)), 960);
}
memcpy(buf, processedBuffer+(PACKET_SIZE*i), PACKET_SIZE);
2017-02-02 19:24:40 +03:00
}else{
2018-05-15 21:23:46 +03:00
//LOGE("Error decoding, result=%d", size);
2017-02-02 19:24:40 +03:00
memset(buf, 0, PACKET_SIZE);
}
decodedQueue->Put(buf);
}else{
LOGW("decoder: no buffers left!");
}
}
}
}
2018-05-15 21:23:46 +03:00
int tgvoip::OpusDecoder::DecodeNextFrame(){
int playbackDuration=0;
2018-06-04 22:37:43 +03:00
bool isEC=false;
size_t len=jitterBuffer->HandleOutput(buffer, 8192, 0, true, playbackDuration, isEC);
2018-05-15 21:23:46 +03:00
bool fec=false;
if(!len){
fec=true;
2018-06-04 22:37:43 +03:00
len=jitterBuffer->HandleOutput(buffer, 8192, 0, false, playbackDuration, isEC);
//if(len)
// LOGV("Trying FEC...");
2018-05-15 21:23:46 +03:00
}
int size;
if(len){
2018-06-04 22:37:43 +03:00
size=opus_decode(isEC ? ecDec : dec, buffer, len, (opus_int16 *) decodeBuffer, packetsPerFrame*960, fec ? 1 : 0);
2018-05-15 21:23:46 +03:00
consecutiveLostPackets=0;
2018-06-04 22:37:43 +03:00
if(prevWasEC!=isEC && size){
// It turns out the waveforms generated by the PLC feature are also great to help smooth out the
// otherwise audible transition between the frames from different decoders. Those are basically an extrapolation
// of the previous successfully decoded data -- which is exactly what we need here.
size=opus_decode(prevWasEC ? ecDec : dec, NULL, 0, (opus_int16*)nextBuffer, packetsPerFrame*960, 0);
if(size){
int16_t* plcSamples=reinterpret_cast<int16_t*>(nextBuffer);
int16_t* samples=reinterpret_cast<int16_t*>(decodeBuffer);
constexpr float coeffs[]={0.999802, 0.995062, 0.984031, 0.966778, 0.943413, 0.914084, 0.878975, 0.838309, 0.792344,
0.741368, 0.685706, 0.625708, 0.561754, 0.494249, 0.423619, 0.350311, 0.274788, 0.197527, 0.119018, 0.039757};
for(int i=0;i<20;i++){
samples[i]=(int16_t)round((plcSamples[i]*coeffs[i]+(float)samples[i]*(1.0-coeffs[i])));
}
}
}
prevWasEC=isEC;
prevLastSample=decodeBuffer[size-1];
2018-05-15 21:23:46 +03:00
}else{ // do packet loss concealment
consecutiveLostPackets++;
if(consecutiveLostPackets>2 && enableDTX){
silentPacketCount+=packetsPerFrame;
size=packetsPerFrame*960;
}else{
2018-06-04 22:37:43 +03:00
size=opus_decode(prevWasEC ? ecDec : dec, NULL, 0, (opus_int16 *) decodeBuffer, packetsPerFrame*960, 0);
2018-05-15 21:23:46 +03:00
//LOGV("PLC");
}
}
if(size<0)
LOGW("decoder: opus_decode error %d", size);
remainingDataLen=size;
if(playbackDuration==80){
processedBuffer=buffer;
audio::Resampler::Rescale60To80((int16_t*) decodeBuffer, (int16_t*) processedBuffer);
}else if(playbackDuration==40){
processedBuffer=buffer;
audio::Resampler::Rescale60To40((int16_t*) decodeBuffer, (int16_t*) processedBuffer);
}else{
processedBuffer=decodeBuffer;
}
return playbackDuration;
}
2017-02-02 19:24:40 +03:00
void tgvoip::OpusDecoder::SetFrameDuration(uint32_t duration){
2017-02-02 19:24:40 +03:00
frameDuration=duration;
2018-05-15 21:23:46 +03:00
packetsPerFrame=frameDuration/20;
2017-02-02 19:24:40 +03:00
}
2018-06-04 22:37:43 +03:00
void tgvoip::OpusDecoder::SetJitterBuffer(std::shared_ptr<JitterBuffer> jitterBuffer){
2018-05-15 21:23:46 +03:00
this->jitterBuffer=jitterBuffer;
2017-02-02 19:24:40 +03:00
}
2018-05-15 21:23:46 +03:00
void tgvoip::OpusDecoder::SetDTX(bool enable){
enableDTX=enable;
}
2017-02-02 19:24:40 +03:00
2018-05-15 21:23:46 +03:00
void tgvoip::OpusDecoder::SetLevelMeter(AudioLevelMeter *levelMeter){
this->levelMeter=levelMeter;
2017-02-02 19:24:40 +03:00
}
void tgvoip::OpusDecoder::AddAudioEffect(AudioEffect *effect){
postProcEffects.push_back(effect);
}
void tgvoip::OpusDecoder::RemoveAudioEffect(AudioEffect *effect){
std::vector<AudioEffect*>::iterator i=std::find(postProcEffects.begin(), postProcEffects.end(), effect);
if(i!=postProcEffects.end())
postProcEffects.erase(i);
}