mirror of
https://github.com/danog/libtgvoip.git
synced 2025-01-09 14:18:24 +01:00
176 lines
6.3 KiB
C++
176 lines
6.3 KiB
C++
|
/*
|
||
|
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||
|
*
|
||
|
* Use of this source code is governed by a BSD-style license
|
||
|
* that can be found in the LICENSE file in the root of the source
|
||
|
* tree. An additional intellectual property rights grant can be found
|
||
|
* in the file PATENTS. All contributing project authors may
|
||
|
* be found in the AUTHORS file in the root of the source tree.
|
||
|
*/
|
||
|
|
||
|
#include "modules/audio_processing/transient/transient_detector.h"
|
||
|
|
||
|
#include <float.h>
|
||
|
#include <math.h>
|
||
|
#include <string.h>
|
||
|
#include <algorithm>
|
||
|
|
||
|
#include "modules/audio_processing/transient/common.h"
|
||
|
#include "modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h"
|
||
|
#include "modules/audio_processing/transient/moving_moments.h"
|
||
|
#include "modules/audio_processing/transient/wpd_node.h"
|
||
|
#include "modules/audio_processing/transient/wpd_tree.h"
|
||
|
#include "rtc_base/checks.h"
|
||
|
|
||
|
namespace webrtc {
|
||
|
|
||
|
static const int kTransientLengthMs = 30;
|
||
|
static const int kChunksAtStartupLeftToDelete =
|
||
|
kTransientLengthMs / ts::kChunkSizeMs;
|
||
|
static const float kDetectThreshold = 16.f;
|
||
|
|
||
|
TransientDetector::TransientDetector(int sample_rate_hz)
|
||
|
: samples_per_chunk_(sample_rate_hz * ts::kChunkSizeMs / 1000),
|
||
|
last_first_moment_(),
|
||
|
last_second_moment_(),
|
||
|
chunks_at_startup_left_to_delete_(kChunksAtStartupLeftToDelete),
|
||
|
reference_energy_(1.f),
|
||
|
using_reference_(false) {
|
||
|
RTC_DCHECK(sample_rate_hz == ts::kSampleRate8kHz ||
|
||
|
sample_rate_hz == ts::kSampleRate16kHz ||
|
||
|
sample_rate_hz == ts::kSampleRate32kHz ||
|
||
|
sample_rate_hz == ts::kSampleRate48kHz);
|
||
|
int samples_per_transient = sample_rate_hz * kTransientLengthMs / 1000;
|
||
|
// Adjustment to avoid data loss while downsampling, making
|
||
|
// |samples_per_chunk_| and |samples_per_transient| always divisible by
|
||
|
// |kLeaves|.
|
||
|
samples_per_chunk_ -= samples_per_chunk_ % kLeaves;
|
||
|
samples_per_transient -= samples_per_transient % kLeaves;
|
||
|
|
||
|
tree_leaves_data_length_ = samples_per_chunk_ / kLeaves;
|
||
|
wpd_tree_.reset(new WPDTree(samples_per_chunk_,
|
||
|
kDaubechies8HighPassCoefficients,
|
||
|
kDaubechies8LowPassCoefficients,
|
||
|
kDaubechies8CoefficientsLength, kLevels));
|
||
|
for (size_t i = 0; i < kLeaves; ++i) {
|
||
|
moving_moments_[i].reset(
|
||
|
new MovingMoments(samples_per_transient / kLeaves));
|
||
|
}
|
||
|
|
||
|
first_moments_.reset(new float[tree_leaves_data_length_]);
|
||
|
second_moments_.reset(new float[tree_leaves_data_length_]);
|
||
|
|
||
|
for (int i = 0; i < kChunksAtStartupLeftToDelete; ++i) {
|
||
|
previous_results_.push_back(0.f);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
TransientDetector::~TransientDetector() {}
|
||
|
|
||
|
float TransientDetector::Detect(const float* data,
|
||
|
size_t data_length,
|
||
|
const float* reference_data,
|
||
|
size_t reference_length) {
|
||
|
RTC_DCHECK(data);
|
||
|
RTC_DCHECK_EQ(samples_per_chunk_, data_length);
|
||
|
|
||
|
// TODO(aluebs): Check if these errors can logically happen and if not assert
|
||
|
// on them.
|
||
|
if (wpd_tree_->Update(data, samples_per_chunk_) != 0) {
|
||
|
return -1.f;
|
||
|
}
|
||
|
|
||
|
float result = 0.f;
|
||
|
|
||
|
for (size_t i = 0; i < kLeaves; ++i) {
|
||
|
WPDNode* leaf = wpd_tree_->NodeAt(kLevels, i);
|
||
|
|
||
|
moving_moments_[i]->CalculateMoments(leaf->data(), tree_leaves_data_length_,
|
||
|
first_moments_.get(),
|
||
|
second_moments_.get());
|
||
|
|
||
|
// Add value delayed (Use the last moments from the last call to Detect).
|
||
|
float unbiased_data = leaf->data()[0] - last_first_moment_[i];
|
||
|
result +=
|
||
|
unbiased_data * unbiased_data / (last_second_moment_[i] + FLT_MIN);
|
||
|
|
||
|
// Add new values.
|
||
|
for (size_t j = 1; j < tree_leaves_data_length_; ++j) {
|
||
|
unbiased_data = leaf->data()[j] - first_moments_[j - 1];
|
||
|
result +=
|
||
|
unbiased_data * unbiased_data / (second_moments_[j - 1] + FLT_MIN);
|
||
|
}
|
||
|
|
||
|
last_first_moment_[i] = first_moments_[tree_leaves_data_length_ - 1];
|
||
|
last_second_moment_[i] = second_moments_[tree_leaves_data_length_ - 1];
|
||
|
}
|
||
|
|
||
|
result /= tree_leaves_data_length_;
|
||
|
|
||
|
result *= ReferenceDetectionValue(reference_data, reference_length);
|
||
|
|
||
|
if (chunks_at_startup_left_to_delete_ > 0) {
|
||
|
chunks_at_startup_left_to_delete_--;
|
||
|
result = 0.f;
|
||
|
}
|
||
|
|
||
|
if (result >= kDetectThreshold) {
|
||
|
result = 1.f;
|
||
|
} else {
|
||
|
// Get proportional value.
|
||
|
// Proportion achieved with a squared raised cosine function with domain
|
||
|
// [0, kDetectThreshold) and image [0, 1), it's always increasing.
|
||
|
const float horizontal_scaling = ts::kPi / kDetectThreshold;
|
||
|
const float kHorizontalShift = ts::kPi;
|
||
|
const float kVerticalScaling = 0.5f;
|
||
|
const float kVerticalShift = 1.f;
|
||
|
|
||
|
result =
|
||
|
(cos(result * horizontal_scaling + kHorizontalShift) + kVerticalShift) *
|
||
|
kVerticalScaling;
|
||
|
result *= result;
|
||
|
}
|
||
|
|
||
|
previous_results_.pop_front();
|
||
|
previous_results_.push_back(result);
|
||
|
|
||
|
// In the current implementation we return the max of the current result and
|
||
|
// the previous results, so the high results have a width equals to
|
||
|
// |transient_length|.
|
||
|
return *std::max_element(previous_results_.begin(), previous_results_.end());
|
||
|
}
|
||
|
|
||
|
// Looks for the highest slope and compares it with the previous ones.
|
||
|
// An exponential transformation takes this to the [0, 1] range. This value is
|
||
|
// multiplied by the detection result to avoid false positives.
|
||
|
float TransientDetector::ReferenceDetectionValue(const float* data,
|
||
|
size_t length) {
|
||
|
if (data == NULL) {
|
||
|
using_reference_ = false;
|
||
|
return 1.f;
|
||
|
}
|
||
|
static const float kEnergyRatioThreshold = 0.2f;
|
||
|
static const float kReferenceNonLinearity = 20.f;
|
||
|
static const float kMemory = 0.99f;
|
||
|
float reference_energy = 0.f;
|
||
|
for (size_t i = 1; i < length; ++i) {
|
||
|
reference_energy += data[i] * data[i];
|
||
|
}
|
||
|
if (reference_energy == 0.f) {
|
||
|
using_reference_ = false;
|
||
|
return 1.f;
|
||
|
}
|
||
|
RTC_DCHECK_NE(0, reference_energy_);
|
||
|
float result = 1.f / (1.f + exp(kReferenceNonLinearity *
|
||
|
(kEnergyRatioThreshold -
|
||
|
reference_energy / reference_energy_)));
|
||
|
reference_energy_ =
|
||
|
kMemory * reference_energy_ + (1.f - kMemory) * reference_energy;
|
||
|
|
||
|
using_reference_ = true;
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
} // namespace webrtc
|