From 17ef5b1c75ddc81e980d56927c4a1c3b5d71745f Mon Sep 17 00:00:00 2001 From: Max Maisel <36964161+mmmaisel@users.noreply.github.com> Date: Fri, 24 Jan 2020 19:04:19 +0100 Subject: [PATCH] Fix two bugs in loudness effect (#410) * Calculate loudness for short or silent selections as well. In case of selections shorter than 400ms (one momentary loudness block), take what we have got and divide only be the actual length. Abort loudness normalization silently if the selected audio is all silent. * Fix loudness effect bug when selection includes a gap. If the selected audio in a track contained a gap between two clips, an incorrect amount of samples was processed. --- src/effects/EBUR128.cpp | 100 ++++++++++++++++++++--------------- src/effects/EBUR128.h | 3 ++ src/effects/Loudness.cpp | 24 ++++++--- tests/octave/loudness_test.m | 22 +++++++- 4 files changed, 96 insertions(+), 53 deletions(-) diff --git a/src/effects/EBUR128.cpp b/src/effects/EBUR128.cpp index 59c2b955f..7978380c8 100644 --- a/src/effects/EBUR128.cpp +++ b/src/effects/EBUR128.cpp @@ -107,35 +107,9 @@ void EBUR128::NextSample() if(mBlockRingPos % mBlockOverlap == 0) { - // Process new full block. As incomplete blocks shall be discarded - // according to the EBU R128 specification there is no need for - // some special logic for the last blocks. + // A new full block of samples was submitted. if(mBlockRingSize >= mBlockSize) - { - // Reset mBlockRingSize to full state to avoid overflow. - // The actual value of mBlockRingSize does not matter - // since this is only used to detect if blocks are complete (>= mBlockSize). - mBlockRingSize = mBlockSize; - - size_t idx; - double blockVal = 0; - for(size_t i = 0; i < mBlockSize; ++i) - blockVal += mBlockRingBuffer[i]; - - // Histogram values are simplified log10() immediate values - // without -0.691 + 10*(...) to safe computing power. This is - // possible because these constant cancel out anyway during the - // following processing steps. - blockVal = log10(blockVal/double(mBlockSize)); - // log(blockVal) is within ]-inf, 1] - idx = round((blockVal - GAMMA_A) * double(HIST_BIN_COUNT) / -GAMMA_A - 1); - - // idx is within ]-inf, HIST_BIN_COUNT-1], discard indices below 0 - // as they are below the EBU R128 absolute threshold anyway. - if(// idx >= 0 && - idx < HIST_BIN_COUNT) - ++mLoudnessHist[idx]; - } + AddBlockToHistogram(mBlockSize); } // Close the ring. if(mBlockRingPos == mBlockSize) @@ -148,14 +122,15 @@ double EBUR128::IntegrativeLoudness() // EBU R128: z_i = mean square without root // Calculate Gamma_R from histogram. - double sum_v = 0; - double val; - long int sum_c = 0; - for(size_t i = 0; i < HIST_BIN_COUNT; ++i) + double sum_v; + long int sum_c; + HistogramSums(0, sum_v, sum_c); + + // Handle incomplete block if no non-zero block was found. + if(sum_c == 0) { - val = -GAMMA_A / double(HIST_BIN_COUNT) * (i+1) + GAMMA_A; - sum_v += pow(10, val) * mLoudnessHist[i]; - sum_c += mLoudnessHist[i]; + AddBlockToHistogram(mBlockRingSize); + HistogramSums(0, sum_v, sum_c); } // Histogram values are simplified log(x^2) immediate values @@ -167,15 +142,54 @@ double EBUR128::IntegrativeLoudness() size_t idx_R = round((Gamma_R - GAMMA_A) * double(HIST_BIN_COUNT) / -GAMMA_A - 1); // Apply Gamma_R threshold and calculate gated loudness (extent). - sum_v = 0; - sum_c = 0; - for(size_t i = idx_R+1; i < HIST_BIN_COUNT; ++i) - { - val = -GAMMA_A / double(HIST_BIN_COUNT) * (i+1) + GAMMA_A; - sum_v += pow(10, val) * mLoudnessHist[i]; - sum_c += mLoudnessHist[i]; - } + HistogramSums(idx_R+1, sum_v, sum_c); + if(sum_c == 0) + // Silence was processed. + return 0; // LUFS is defined as -0.691 dB + 10*log10(sum(channels)) return 0.8529037031 * sum_v / sum_c; } +void EBUR128::HistogramSums(size_t start_idx, double& sum_v, long int& sum_c) +{ + double val; + sum_v = 0; + sum_c = 0; + for(size_t i = start_idx; i < HIST_BIN_COUNT; ++i) + { + val = -GAMMA_A / double(HIST_BIN_COUNT) * (i+1) + GAMMA_A; + sum_v += pow(10, val) * mLoudnessHist[i]; + sum_c += mLoudnessHist[i]; + } +} + +/// Process new full block. Incomplete blocks shall be discarded +/// according to the EBU R128 specification there is usually no need +/// to call this on the last block. +/// However, allow to override the block size if the audio to be +/// processed is shorter than one block. +void EBUR128::AddBlockToHistogram(size_t validLen) +{ + // Reset mBlockRingSize to full state to avoid overflow. + // The actual value of mBlockRingSize does not matter + // since this is only used to detect if blocks are complete (>= mBlockSize). + mBlockRingSize = mBlockSize; + + size_t idx; + double blockVal = 0; + for(size_t i = 0; i < validLen; ++i) + blockVal += mBlockRingBuffer[i]; + + // Histogram values are simplified log10() immediate values + // without -0.691 + 10*(...) to safe computing power. This is + // possible because these constant cancel out anyway during the + // following processing steps. + blockVal = log10(blockVal/double(validLen)); + // log(blockVal) is within ]-inf, 1] + idx = round((blockVal - GAMMA_A) * double(HIST_BIN_COUNT) / -GAMMA_A - 1); + + // idx is within ]-inf, HIST_BIN_COUNT-1], discard indices below 0 + // as they are below the EBU R128 absolute threshold anyway. + if(idx < HIST_BIN_COUNT) + ++mLoudnessHist[idx]; +} diff --git a/src/effects/EBUR128.h b/src/effects/EBUR128.h index 883a61d94..4148f4c35 100644 --- a/src/effects/EBUR128.h +++ b/src/effects/EBUR128.h @@ -33,6 +33,9 @@ public: { return 10 * log10(loudness); } private: + void HistogramSums(size_t start_idx, double& sum_v, long int& sum_c); + void AddBlockToHistogram(size_t validLen); + static const size_t HIST_BIN_COUNT = 65536; /// EBU R128 absolute threshold static constexpr double GAMMA_A = (-70.0 + 0.691) / 10.0; diff --git a/src/effects/Loudness.cpp b/src/effects/Loudness.cpp index c68a60936..ef968bbf8 100644 --- a/src/effects/Loudness.cpp +++ b/src/effects/Loudness.cpp @@ -252,6 +252,13 @@ bool EffectLoudness::Process() // RMS: use average RMS, average must be calculated in quadratic domain. extent = sqrt((mRMS[0] * mRMS[0] + mRMS[1] * mRMS[1]) / 2.0); } + + if(extent == 0.0) + { + mLoudnessProcessor.reset(); + FreeBuffers(); + return false; + } mMult = mRatio / extent; if(mNormalizeTo == kLoudness) @@ -461,10 +468,8 @@ bool EffectLoudness::ProcessOne(TrackIterRange range, bool analyse) { if(!ProcessBufferBlock()) return false; - } - - if(!analyse) StoreBufferBlock(range, s, blockLen); + } // Increment s one blockfull of samples s += blockLen; @@ -478,21 +483,24 @@ bool EffectLoudness::LoadBufferBlock(TrackIterRange range, sampleCount pos, size_t len) { sampleCount read_size = -1; + sampleCount last_read_size = -1; // Get the samples from the track and put them in the buffer int idx = 0; for(auto channel : range) { channel->Get((samplePtr) mTrackBuffer[idx].get(), floatSample, pos, len, fillZero, true, &read_size); - mTrackBufferLen = read_size.as_size_t(); - + // WaveTrack::Get returns the amount of read samples excluding zero + // filled samples from clip gaps. But in case of stereo tracks with + // assymetric gaps it still returns the same number for both channels. + // // Fail if we read different sample count from stereo pair tracks. - // Ignore this check during first iteration (read_size == -1). - if(read_size.as_size_t() != mTrackBufferLen && read_size != -1) + // Ignore this check during first iteration (last_read_size == -1). + if(read_size != last_read_size && last_read_size.as_long_long() != -1) return false; - ++idx; } + mTrackBufferLen = len; return true; } diff --git a/tests/octave/loudness_test.m b/tests/octave/loudness_test.m index 4ab66f204..43a9ea74d 100644 --- a/tests/octave/loudness_test.m +++ b/tests/octave/loudness_test.m @@ -122,11 +122,29 @@ if TEST_LUFS_HELPER printf("LUFS-selftest3.wav should be %f LUFS\n", calc_LUFS(x, fs)); end +## Test Loudness LUFS mode: block to short and all silent +CURRENT_TEST = "Loudness LUFS mode, short silent block"; +fs= 44100; +x = zeros(ceil(fs*0.35), 2); +audiowrite(TMP_FILENAME, x, fs); +if EXPORT_TEST_SIGNALS + audiowrite(cstrcat(pwd(), "/Loudness-LUFS-silence-test.wav"), x, fs); +end + +remove_all_tracks(); +aud_do(cstrcat("Import2: Filename=\"", TMP_FILENAME, "\"\n")); +select_tracks(0, 100); +aud_do("LoudnessNormalization: LUFSLevel=-23 DualMono=1 NormalizeTo=0 StereoIndependent=0\n"); +aud_do(cstrcat("Export2: Filename=\"", TMP_FILENAME, "\" NumChannels=2\n")); +system("sync"); + +y = audioread(TMP_FILENAME); +do_test_equ(y, x, "identity"); + ## Test Loudness LUFS mode: stereo dependent CURRENT_TEST = "Loudness LUFS mode, keep DC and stereo balance"; randn("seed", 1); -fs= 44100; -# Include some silecne in the test signal to test loudness gating +# Include some silence in the test signal to test loudness gating # and vary the overall loudness over time. x = [0.1*randn(15*fs, 2).', zeros(5*fs, 2).', 0.1*randn(15*fs, 2).'].'; x(:,1) = x(:,1) .* sin(2*pi/fs/35*(1:1:35*fs)).' .* 1.2;