1
0
mirror of https://github.com/danog/libtgvoip.git synced 2024-12-11 16:49:52 +01:00
libtgvoip/controller/audio/OpusDecoder.cpp

330 lines
8.3 KiB
C++
Raw Normal View History

2020-01-22 12:43:51 +01:00
//
// libtgvoip is free and unencumbered public domain software.
// For more information, see http://unlicense.org or the UNLICENSE file
// you should have received with this source code distribution.
//
2020-01-22 12:51:17 +01:00
#include "controller/audio/OpusDecoder.h"
2020-01-22 12:43:51 +01:00
#include "audio/Resampler.h"
#include "tools/logging.h"
#include <assert.h>
#include <math.h>
#include <algorithm>
#include "VoIPController.h"
#define PACKET_SIZE (960 * 2)
using namespace tgvoip;
tgvoip::OpusDecoder::OpusDecoder(const std::shared_ptr<MediaStreamItf> &dst, bool isAsync, bool needEC)
{
dst->SetCallback(OpusDecoder::Callback, this);
Initialize(isAsync, needEC);
}
tgvoip::OpusDecoder::OpusDecoder(const std::unique_ptr<MediaStreamItf> &dst, bool isAsync, bool needEC)
{
dst->SetCallback(OpusDecoder::Callback, this);
Initialize(isAsync, needEC);
}
void tgvoip::OpusDecoder::Initialize(bool isAsync, bool needEC)
{
async = isAsync;
if (async)
{
decodedQueue = new BlockingQueue<Buffer>(33);
semaphore = new Semaphore(32, 0);
}
else
{
decodedQueue = NULL;
semaphore = NULL;
}
dec = opus_decoder_create(48000, 1, NULL);
if (needEC)
ecDec = opus_decoder_create(48000, 1, NULL);
else
ecDec = NULL;
2020-03-10 20:31:58 +01:00
buffer = reinterpret_cast<unsigned char*>(aligned_alloc(2, 8192));
2020-01-22 12:43:51 +01:00
lastDecoded = NULL;
outputBufferSize = 0;
echoCanceller = NULL;
frameDuration = 20;
consecutiveLostPackets = 0;
enableDTX = false;
silentPacketCount = 0;
levelMeter = NULL;
nextLen = 0;
running = false;
remainingDataLen = 0;
processedBuffer = NULL;
prevWasEC = false;
prevLastSample = 0;
}
tgvoip::OpusDecoder::~OpusDecoder()
{
opus_decoder_destroy(dec);
if (ecDec)
opus_decoder_destroy(ecDec);
free(buffer);
if (decodedQueue)
delete decodedQueue;
if (semaphore)
delete semaphore;
}
2020-01-24 20:26:34 +01:00
void tgvoip::OpusDecoder::SetEchoCanceller(const std::shared_ptr<EchoCanceller> &canceller)
2020-01-22 12:43:51 +01:00
{
echoCanceller = canceller;
}
size_t tgvoip::OpusDecoder::Callback(unsigned char *data, size_t len, void *param)
{
2020-03-10 20:31:58 +01:00
return reinterpret_cast<OpusDecoder *>(param)->HandleCallback(data, len);
2020-01-22 12:43:51 +01:00
}
size_t tgvoip::OpusDecoder::HandleCallback(unsigned char *data, size_t len)
{
if (async)
{
if (!running)
{
memset(data, 0, len);
return 0;
}
if (outputBufferSize == 0)
{
outputBufferSize = len;
int packetsNeeded;
if (len > PACKET_SIZE)
packetsNeeded = len / PACKET_SIZE;
else
packetsNeeded = 1;
packetsNeeded *= 2;
semaphore->Release(packetsNeeded);
}
assert(outputBufferSize == len && "output buffer size is supposed to be the same throughout callbacks");
if (len == PACKET_SIZE)
{
Buffer lastDecoded = decodedQueue->GetBlocking();
if (lastDecoded.IsEmpty())
return 0;
memcpy(data, *lastDecoded, PACKET_SIZE);
semaphore->Release();
if (silentPacketCount > 0)
{
silentPacketCount--;
if (levelMeter)
levelMeter->Update(reinterpret_cast<int16_t *>(data), 0);
return 0;
}
if (echoCanceller)
{
echoCanceller->SpeakerOutCallback(data, PACKET_SIZE);
}
}
else
{
LOGE("Opus decoder buffer length != 960 samples");
abort();
}
}
else
{
if (remainingDataLen == 0 && silentPacketCount == 0)
{
int duration = DecodeNextFrame();
remainingDataLen = (size_t)(duration / 20 * 960 * 2);
}
if (silentPacketCount > 0 || remainingDataLen == 0 || !processedBuffer)
{
if (silentPacketCount > 0)
silentPacketCount--;
memset(data, 0, 960 * 2);
if (levelMeter)
levelMeter->Update(reinterpret_cast<int16_t *>(data), 0);
return 0;
}
memcpy(data, processedBuffer, 960 * 2);
remainingDataLen -= 960 * 2;
if (remainingDataLen > 0)
{
memmove(processedBuffer, processedBuffer + 960 * 2, remainingDataLen);
}
}
if (levelMeter)
levelMeter->Update(reinterpret_cast<int16_t *>(data), len / 2);
return len;
}
void tgvoip::OpusDecoder::Start()
{
if (!async)
return;
running = true;
thread = new Thread(std::bind(&tgvoip::OpusDecoder::RunThread, this));
thread->SetName("opus_decoder");
thread->SetMaxPriority();
thread->Start();
}
void tgvoip::OpusDecoder::Stop()
{
if (!running || !async)
return;
running = false;
semaphore->Release();
thread->Join();
delete thread;
}
void tgvoip::OpusDecoder::RunThread()
{
int i;
LOGI("decoder: packets per frame %d", packetsPerFrame);
while (running)
{
int playbackDuration = DecodeNextFrame();
for (i = 0; i < playbackDuration / 20; i++)
{
semaphore->Acquire();
if (!running)
{
LOGI("==== decoder exiting ====");
return;
}
try
{
Buffer buf = bufferPool.Get();
if (remainingDataLen > 0)
{
2020-01-24 20:26:34 +01:00
for (auto &effect : postProcEffects)
2020-01-22 12:43:51 +01:00
{
effect->Process(reinterpret_cast<int16_t *>(processedBuffer + (PACKET_SIZE * i)), 960);
}
buf.CopyFrom(processedBuffer + (PACKET_SIZE * i), 0, PACKET_SIZE);
}
else
{
//LOGE("Error decoding, result=%d", size);
memset(*buf, 0, PACKET_SIZE);
}
decodedQueue->Put(std::move(buf));
}
catch (std::bad_alloc &x)
{
LOGW("decoder: no buffers left!");
}
}
}
}
int tgvoip::OpusDecoder::DecodeNextFrame()
{
int playbackDuration = 0;
bool isEC = false;
2020-03-13 21:12:58 +01:00
size_t len = jitterBuffer->HandleOutput(buffer, 8192, true, playbackDuration, isEC);
2020-01-22 12:43:51 +01:00
bool fec = false;
if (!len)
{
fec = true;
2020-03-13 21:12:58 +01:00
len = jitterBuffer->HandleOutput(buffer, 8192, false, playbackDuration, isEC);
2020-01-29 20:11:39 +01:00
/*if (len)
LOGV("Trying FEC...");*/
2020-01-22 12:43:51 +01:00
}
int size;
if (len)
{
size = opus_decode(isEC ? ecDec : dec, buffer, len, (opus_int16 *)decodeBuffer, packetsPerFrame * 960, fec ? 1 : 0);
consecutiveLostPackets = 0;
if (prevWasEC != isEC && size)
{
// It turns out the waveforms generated by the PLC feature are also great to help smooth out the
// otherwise audible transition between the frames from different decoders. Those are basically an extrapolation
// of the previous successfully decoded data -- which is exactly what we need here.
2020-03-10 20:31:58 +01:00
size = opus_decode(prevWasEC ? ecDec : dec, NULL, 0, reinterpret_cast<opus_int16*>(nextBuffer), packetsPerFrame * 960, 0);
if (size) {
int16_t* plcSamples = reinterpret_cast<int16_t*>(nextBuffer);
int16_t* samples = reinterpret_cast<int16_t*>(decodeBuffer);
constexpr float coeffs[] = {0.999802f, 0.995062f, 0.984031f, 0.966778f, 0.943413f, 0.914084f, 0.878975f, 0.838309f, 0.792344f, 0.741368f,
0.685706f, 0.625708f, 0.561754f, 0.494249f, 0.423619f, 0.350311f, 0.274788f, 0.197527f, 0.119018f, 0.039757f};
2020-03-13 16:47:13 +01:00
// But why 20 samples? Typically the sample size is 60....
2020-03-10 20:31:58 +01:00
for (int i = 0; i < 20; i++) {
samples[i] = static_cast<int16_t>(round(plcSamples[i] * coeffs[i] + samples[i] * (1.f - coeffs[i])));
2020-01-22 12:43:51 +01:00
}
}
}
prevWasEC = isEC;
prevLastSample = decodeBuffer[size - 1];
}
else
{ // do packet loss concealment
2020-03-13 16:47:13 +01:00
if (++consecutiveLostPackets > 2 && enableDTX)
2020-01-22 12:43:51 +01:00
{
silentPacketCount += packetsPerFrame;
size = packetsPerFrame * 960;
}
else
{
size = opus_decode(prevWasEC ? ecDec : dec, NULL, 0, (opus_int16 *)decodeBuffer, packetsPerFrame * 960, 0);
//LOGV("PLC");
}
}
if (size < 0)
LOGW("decoder: opus_decode error %d", size);
remainingDataLen = size;
if (playbackDuration == 80)
{
2020-01-29 20:11:39 +01:00
//LOGW("Rescaling to 80");
2020-01-22 12:43:51 +01:00
processedBuffer = buffer;
audio::Resampler::Rescale60To80((int16_t *)decodeBuffer, (int16_t *)processedBuffer);
}
else if (playbackDuration == 40)
{
2020-01-29 20:11:39 +01:00
//LOGW("Rescaling to 40");
2020-01-22 12:43:51 +01:00
processedBuffer = buffer;
audio::Resampler::Rescale60To40((int16_t *)decodeBuffer, (int16_t *)processedBuffer);
}
else
{
processedBuffer = decodeBuffer;
}
return playbackDuration;
}
void tgvoip::OpusDecoder::SetFrameDuration(uint32_t duration)
{
frameDuration = duration;
packetsPerFrame = frameDuration / 20;
}
2020-01-24 20:26:34 +01:00
void tgvoip::OpusDecoder::SetJitterBuffer(const std::shared_ptr<JitterBuffer> &jitterBuffer)
2020-01-22 12:43:51 +01:00
{
this->jitterBuffer = jitterBuffer;
}
void tgvoip::OpusDecoder::SetDTX(bool enable)
{
enableDTX = enable;
}
2020-01-24 20:26:34 +01:00
void tgvoip::OpusDecoder::SetLevelMeter(const std::shared_ptr<AudioLevelMeter> &levelMeter)
2020-01-22 12:43:51 +01:00
{
this->levelMeter = levelMeter;
}
2020-01-24 20:26:34 +01:00
void tgvoip::OpusDecoder::AddAudioEffect(const std::shared_ptr<effects::AudioEffect> &effect)
2020-01-22 12:43:51 +01:00
{
postProcEffects.push_back(effect);
}
2020-01-24 20:26:34 +01:00
void tgvoip::OpusDecoder::RemoveAudioEffect(const std::shared_ptr<effects::AudioEffect> &effect)
2020-01-22 12:43:51 +01:00
{
2020-01-24 20:26:34 +01:00
auto i = std::find(postProcEffects.begin(), postProcEffects.end(), effect);
2020-01-22 12:43:51 +01:00
if (i != postProcEffects.end())
postProcEffects.erase(i);
}