Skip to content

Commit a57dab0

Browse files
committed
Use gain energy scale for ATRAC3 allocation
1 parent e6148b4 commit a57dab0

6 files changed

Lines changed: 242 additions & 79 deletions

File tree

src/atrac/at3/atrac3_bitstream.cpp

Lines changed: 23 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
#include <iostream>
2828
#include <vector>
2929
#include <cstdlib>
30+
#include <cmath>
3031

3132
namespace NAtracDEnc {
3233
namespace NAtrac3 {
@@ -212,26 +213,40 @@ bool ConsiderEnergyErr(const vector<float>& err, vector<uint32_t>& bits)
212213
return adjusted;
213214
}
214215

216+
float SanitizeGainEnergyScale(float scale)
217+
{
218+
return std::isfinite(scale) && scale > 0.0f ? scale : 1.0f;
219+
}
220+
221+
float EnergyScaleToScaleFactorOffset(float energyScale)
222+
{
223+
// ScaleFactorIndex is logarithmic in amplitude with 3 steps per octave.
224+
// The gain estimate is an energy ratio, so sqrt(energyScale) is the
225+
// corresponding amplitude ratio.
226+
return 1.5f * std::log2(SanitizeGainEnergyScale(energyScale));
227+
}
228+
215229
vector<uint32_t> CalcBitsAllocation(const TAtrac3BitStreamWriter::TSingleChannelElement& sce,
216230
const uint32_t bfuNum,
217231
const float spread,
218232
const float shift,
219233
const float loudness)
220234
{
221235
const std::vector<TScaledBlock>& scaledBlocks = sce.ScaledBlocks;
222-
const auto gainBoostPerBand = sce.GainBoostPerBand;
223236
vector<uint32_t> bitsPerEachBlock(bfuNum);
224237
for (size_t i = 0; i < bitsPerEachBlock.size(); ++i) {
225-
const float ath = ATH[i] * loudness;
226-
227238
uint32_t bfuBand = 0;
228239
for (uint32_t b = 1; b < TAtrac3Data::NumQMF; ++b) {
229240
if (i >= TAtrac3Data::BlocksPerBand[b]) {
230241
bfuBand = b;
231242
}
232243
}
233244

234-
if (scaledBlocks[i].Energy < ath) {
245+
const float gainEnergyScale = SanitizeGainEnergyScale(sce.GainEnergyScale[bfuBand].Frame);
246+
const float correctedEnergy = scaledBlocks[i].Energy * gainEnergyScale;
247+
const float ath = ATH[i] * loudness;
248+
249+
if (correctedEnergy < ath) {
235250
bitsPerEachBlock[i] = 0;
236251
} else {
237252
const uint32_t fix = FixedBitAllocTable[i];
@@ -248,8 +263,10 @@ vector<uint32_t> CalcBitsAllocation(const TAtrac3BitStreamWriter::TSingleChannel
248263
x = 4.2;
249264
}
250265

251-
const int tmp = spread * ((float)scaledBlocks[i].ScaleFactorIndex / x) + (1.0f - spread) * fix - shift
252-
+ gainBoostPerBand[bfuBand];
266+
const float correctedScaleFactorIndex = std::max(0.0f, std::min(63.0f,
267+
static_cast<float>(scaledBlocks[i].ScaleFactorIndex)
268+
+ EnergyScaleToScaleFactorOffset(gainEnergyScale)));
269+
const int tmp = spread * (correctedScaleFactorIndex / x) + (1.0f - spread) * fix - shift;
253270
if (tmp > 7) {
254271
bitsPerEachBlock[i] = 7;
255272
} else if (tmp < 0) {

src/atrac/at3/atrac3_bitstream.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,18 +40,20 @@ struct TTonalBlock {
4040
TScaledBlock ScaledBlock;
4141
};
4242

43+
struct TGainEnergyScale {
44+
float PrevHalf = 1.0f;
45+
float CurHalf = 1.0f;
46+
float Frame = 1.0f;
47+
};
48+
4349
class TAtrac3BitStreamWriter {
4450
public:
4551
struct TSingleChannelElement {
4652
TAtrac3Data::SubbandInfo SubbandInfo;
4753
std::vector<TTonalBlock> TonalBlocks;
4854
std::vector<TScaledBlock> ScaledBlocks;
4955
float Loudness;
50-
// Per-band bit-allocation boost to compensate for gain-demodulation noise
51-
// amplification. Combines the level boost (from the current frame's gain
52-
// curve) and the scale boost (estimated from the next frame's first gain
53-
// point). Set by CreateSubbandInfo; read by the allocation stage.
54-
int GainBoostPerBand[TAtrac3Data::NumQMF] = {};
56+
TGainEnergyScale GainEnergyScale[TAtrac3Data::NumQMF] = {};
5557
};
5658
private:
5759
ICompressedOutput* Container;

src/atrac3denc.cpp

Lines changed: 115 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,8 @@ TAtrac3Encoder::TAtrac3Encoder(TCompressedOutputPtr&& oma, TAtrac3EncoderSetting
9797
, SingleChannelElements(Params.SourceChannels)
9898
, Upsampler(11025.0f, 800.0f)
9999
{
100+
for (auto& ch : PrevOverlapGainScale)
101+
ch.fill(1.0f);
100102
YamlLog = Params.YamlLog;
101103
}
102104

@@ -138,6 +140,89 @@ float TAtrac3Encoder::LimitRel(float x)
138140
return std::min(std::max(x, TAtrac3Data::GainLevel[15]), TAtrac3Data::GainLevel[0]);
139141
}
140142

143+
static float SafeEnergyScale(float originalEnergy, float modulatedEnergy)
144+
{
145+
static constexpr float kEnergyEps = 1.0e-20f;
146+
if (originalEnergy <= kEnergyEps || modulatedEnergy <= kEnergyEps
147+
|| !std::isfinite(originalEnergy) || !std::isfinite(modulatedEnergy)) {
148+
return 1.0f;
149+
}
150+
const float scale = originalEnergy / modulatedEnergy;
151+
return std::isfinite(scale) && scale > 0.0f ? scale : 1.0f;
152+
}
153+
154+
static void BuildSampleDivisors(const std::vector<TAtrac3Data::SubbandInfo::TGainPoint>& pts, float outDiv[256])
155+
{
156+
std::fill(outDiv, outDiv + 256, 1.0f);
157+
158+
uint32_t pos = 0;
159+
for (size_t i = 0; i < pts.size(); ++i) {
160+
const uint32_t lastPos = pts[i].Location << TAtrac3Data::LocScale;
161+
float level = TAtrac3Data::GainLevel[pts[i].Level];
162+
const int incPos = ((i + 1) < pts.size() ? pts[i + 1].Level : TAtrac3Data::ExponentOffset)
163+
- pts[i].Level + TAtrac3Data::GainInterpolationPosShift;
164+
const float gainInc = TAtrac3Data::GainInterpolation[incPos];
165+
166+
for (; pos < lastPos && pos < 256; ++pos)
167+
outDiv[pos] = level;
168+
for (; pos < lastPos + TAtrac3Data::LocSz && pos < 256; ++pos) {
169+
outDiv[pos] = level;
170+
level *= gainInc;
171+
}
172+
}
173+
}
174+
175+
TAtrac3MDCT::TGainEnergyAnalysis TAtrac3MDCT::CalcGainEnergyScale(
176+
const float prevOverlap[256],
177+
const float curInput[256],
178+
const std::vector<TAtrac3Data::SubbandInfo::TGainPoint>& gainPoints,
179+
float prevOverlapScale)
180+
{
181+
TGainEnergyAnalysis res;
182+
if (!std::isfinite(prevOverlapScale) || prevOverlapScale <= 0.0f)
183+
prevOverlapScale = 1.0f;
184+
185+
const float prevDiv = gainPoints.empty()
186+
? 1.0f
187+
: TAtrac3Data::GainLevel[gainPoints.front().Level];
188+
189+
float prevStoredEnergy = 0.0f;
190+
for (uint32_t i = 0; i < 256; ++i)
191+
prevStoredEnergy += prevOverlap[i] * prevOverlap[i];
192+
193+
const float prevOriginalEnergy = prevStoredEnergy * prevOverlapScale;
194+
const float prevModulatedEnergy = prevStoredEnergy / (prevDiv * prevDiv);
195+
196+
float sampleDiv[256];
197+
BuildSampleDivisors(gainPoints, sampleDiv);
198+
199+
float curOriginalEnergy = 0.0f;
200+
float curModulatedEnergy = 0.0f;
201+
float nextOriginalEnergy = 0.0f;
202+
float nextModulatedEnergy = 0.0f;
203+
for (uint32_t i = 0; i < 256; ++i) {
204+
const float cur = curInput[i];
205+
const float mod = cur / sampleDiv[i];
206+
const float winCur = TAtrac3Data::EncodeWindow[255 - i];
207+
const float winNext = TAtrac3Data::EncodeWindow[i];
208+
const float curWin = cur * winCur;
209+
const float modCurWin = mod * winCur;
210+
const float nextWin = cur * winNext;
211+
const float modNextWin = mod * winNext;
212+
curOriginalEnergy += curWin * curWin;
213+
curModulatedEnergy += modCurWin * modCurWin;
214+
nextOriginalEnergy += nextWin * nextWin;
215+
nextModulatedEnergy += modNextWin * modNextWin;
216+
}
217+
218+
res.Scale.PrevHalf = SafeEnergyScale(prevOriginalEnergy, prevModulatedEnergy);
219+
res.Scale.CurHalf = SafeEnergyScale(curOriginalEnergy, curModulatedEnergy);
220+
res.Scale.Frame = SafeEnergyScale(prevOriginalEnergy + curOriginalEnergy,
221+
prevModulatedEnergy + curModulatedEnergy);
222+
res.NextOverlapScale = SafeEnergyScale(nextOriginalEnergy, nextModulatedEnergy);
223+
return res;
224+
}
225+
141226
// Build 32 subframe-average divisors (gain levels) that Modulate would apply
142227
// to bufNext for a given curve.
143228
static void BuildSubframeDivisors(const std::vector<TGainCurvePoint>& pts, float outDiv[32]) {
@@ -213,12 +298,8 @@ static float CalcCurveEarlyMismatchScore(const std::vector<float>& gain,
213298

214299
void TAtrac3Encoder::CreateSubbandInfo(const float* upInput[4],
215300
uint32_t channel,
216-
TAtrac3Data::SubbandInfo* subbandInfo,
217-
int gainBoostPerBand[TAtrac3Data::NumQMF])
301+
TAtrac3Data::SubbandInfo* subbandInfo)
218302
{
219-
static constexpr float kLowOverlapRelax = 0.6f; // allow softer min level when overlap is small
220-
static constexpr int kLevelBoostCap = 1; // cap level boost to reduce bit starvation
221-
static constexpr int kScaleBoostCap = 2; // allow extra scale boost in low-risk cases
222303
static constexpr float kMinScore = 1.9f;
223304

224305
// YAML: channel header (one channel per CreateSubbandInfo call)
@@ -315,7 +396,6 @@ void TAtrac3Encoder::CreateSubbandInfo(const float* upInput[4],
315396
if (YamlLog) {
316397
*YamlLog << " skip: no_curve\n";
317398
}
318-
gainBoostPerBand[band] = 0;
319399
continue;
320400
}
321401

@@ -329,8 +409,6 @@ void TAtrac3Encoder::CreateSubbandInfo(const float* upInput[4],
329409

330410
float maxGain = 0.0f;
331411
for (float g : gain) maxGain = std::max(maxGain, g);
332-
const float frameEndLevel = gain.back();
333-
const float ratio = maxGain / (frameEndLevel + 1e-9f);
334412

335413
// Minimum signal gate: suppress curves on near-silent frames.
336414
// Firing on noise-floor content wastes bitrate and can produce extreme
@@ -342,7 +420,6 @@ void TAtrac3Encoder::CreateSubbandInfo(const float* upInput[4],
342420
if (YamlLog)
343421
*YamlLog << std::fixed << std::setprecision(6)
344422
<< " skip: below_min_signal # maxGain " << maxGain << "\n";
345-
gainBoostPerBand[band] = 0;
346423
curvePoints.clear();
347424
}
348425

@@ -354,51 +431,12 @@ void TAtrac3Encoder::CreateSubbandInfo(const float* upInput[4],
354431
if (result.highFreqRatio < kMinHfrForAmplify) {
355432
if (YamlLog)
356433
*YamlLog << " skip: amplify_low_hfr\n";
357-
gainBoostPerBand[band] = 0;
358434
curvePoints.clear();
359435
}
360436

361-
int levelBoost = 0;
362-
363-
// Scale boost: compensate for Demodulate's `scale = GainLevel[giNext[0].Level]`.
364-
// When decoding frame N, scale = GainLevel[frame N+1's first gain point Level].
365-
// Frame N+1's CalcCurve: scaleLevel = RelationToIdx(gain.back()_N / nextLevel_{N+2}).
366-
// We have the full frame N+1 in the lookahead [3072..5119]. Use min(lookaheadGain)
367-
// as a conservative proxy for nextLevel_{N+2} (≈ quietest level reachable in N+1,
368-
// a lower bound on frame N+2's start level).
369-
int scaleBoost = 0;
370-
{
371-
static constexpr size_t kLookaheadOffset = 3072;
372-
const size_t outSz = result.signal.size();
373-
if (outSz > kLookaheadOffset + 64) {
374-
const uint32_t lookaheadPoints =
375-
static_cast<uint32_t>(std::min<size_t>(1024, outSz - kLookaheadOffset) / 64);
376-
if (lookaheadPoints > 0) {
377-
const auto lookaheadGain = AnalyzeGain(result.signal.data() + kLookaheadOffset,
378-
lookaheadPoints * 64,
379-
lookaheadPoints, true);
380-
const float lookaheadMin = *std::min_element(lookaheadGain.begin(), lookaheadGain.end());
381-
if (lookaheadMin > 1e-6f) {
382-
const uint32_t estimatedNextScaleLevel = RelationToIdx(frameEndLevel / lookaheadMin);
383-
if (estimatedNextScaleLevel < 4u)
384-
scaleBoost = static_cast<int>(4u - estimatedNextScaleLevel);
385-
}
386-
}
387-
}
388-
}
389-
390-
const int scaleCap = (overlapRatio < kLowOverlapRelax) ? kScaleBoostCap : kLevelBoostCap;
391-
scaleBoost = std::min(scaleBoost, scaleCap);
392-
const int totalBoost = std::min(levelBoost + scaleBoost, kLevelBoostCap);
393-
394437
if (YamlLog) {
395438
*YamlLog << std::fixed << std::setprecision(4)
396-
<< " max_gain: " << maxGain << "\n"
397-
<< " ratio: " << ratio
398-
<< " # max_gain/frame_end_level, transient strength\n"
399-
<< " level_boost: " << levelBoost << "\n"
400-
<< " scale_boost: " << scaleBoost << "\n"
401-
<< " total_boost: " << totalBoost << "\n";
439+
<< " max_gain: " << maxGain << "\n";
402440
}
403441

404442
// Band 3 is above ~16 kHz where pre-echo is largely inaudible.
@@ -408,17 +446,9 @@ void TAtrac3Encoder::CreateSubbandInfo(const float* upInput[4],
408446
*YamlLog << " skip: band_ge_3"
409447
<< " # inaudible HF; gain modulation disabled\n";
410448
}
411-
gainBoostPerBand[band] = 0;
412449
curvePoints.clear();
413450
}
414451

415-
if (band < 3) {
416-
if (YamlLog)
417-
*YamlLog << " gain_boost: " << totalBoost << "\n";
418-
gainBoostPerBand[band] = totalBoost;
419-
}
420-
421-
422452
// Explicit point 0: correct cross-frame energy step in the HPF domain.
423453
// Compare prevTarget (what the previous frame's curve was targeting, in the
424454
// HPF gain[] domain) against the mean HPF level of the pre-ramp zone of
@@ -727,6 +757,8 @@ TPCMEngine::TProcessLambda TAtrac3Encoder::GetLambda()
727757
sce->TonalBlocks.clear();
728758

729759
sce->SubbandInfo.Reset();
760+
for (auto& scale : sce->GainEnergyScale)
761+
scale = TGainEnergyScale{};
730762
if (!Params.NoGainControll) {
731763
// upInput[b]:
732764
// [0..127] prev tail (last 128 of previous frame)
@@ -741,9 +773,30 @@ TPCMEngine::TProcessLambda TAtrac3Encoder::GetLambda()
741773
jsStereo ? jsGainInput[channel][2] : LookAheadBuf[channel][2],
742774
jsStereo ? jsGainInput[channel][3] : LookAheadBuf[channel][3]
743775
};
744-
std::fill(sce->GainBoostPerBand,
745-
sce->GainBoostPerBand + TAtrac3Data::NumQMF, 0);
746-
CreateSubbandInfo(up, channel, &sce->SubbandInfo, sce->GainBoostPerBand);
776+
CreateSubbandInfo(up, channel, &sce->SubbandInfo);
777+
}
778+
779+
for (uint32_t band = 0; band < TAtrac3Data::NumQMF; ++band) {
780+
const uint32_t qmfIdx = channel + band * 2;
781+
const auto gainEnergy = CalcGainEnergyScale(PcmBuffer.GetFirst(qmfIdx),
782+
PcmBuffer.GetSecond(qmfIdx),
783+
sce->SubbandInfo.GetGainPoints(band),
784+
PrevOverlapGainScale[channel][band]);
785+
sce->GainEnergyScale[band] = gainEnergy.Scale;
786+
PrevOverlapGainScale[channel][band] = gainEnergy.NextOverlapScale;
787+
}
788+
if (YamlLog && !Params.NoGainControll) {
789+
*YamlLog << std::fixed << std::setprecision(6)
790+
<< " gain_energy_scale:\n";
791+
for (uint32_t band = 0; band < TAtrac3Data::NumQMF; ++band) {
792+
const auto& scale = sce->GainEnergyScale[band];
793+
*YamlLog << " - {band: " << band
794+
<< ", prev_half: " << scale.PrevHalf
795+
<< ", cur_half: " << scale.CurHalf
796+
<< ", frame: " << scale.Frame
797+
<< ", next_overlap: " << PrevOverlapGainScale[channel][band]
798+
<< "}\n";
799+
}
747800
}
748801

749802
float* maxOverlapLevels = PrevPeak[channel];
@@ -760,7 +813,8 @@ TPCMEngine::TProcessLambda TAtrac3Encoder::GetLambda()
760813
for (size_t i = 0; i < specs.size(); i++) {
761814
float e = specs[i] * specs[i];
762815
mdctEnergy[i] = e;
763-
l += e * LoudnessCurve[i];
816+
const uint32_t band = static_cast<uint32_t>(i / 256);
817+
l += e * sce->GainEnergyScale[band].Frame * LoudnessCurve[i];
764818
}
765819

766820
sce->Loudness = l;

src/atrac3denc.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,17 @@ class TAtrac3MDCT {
6666
public:
6767
using TGainModulator = TAtrac3GainProcessor::TGainModulator;
6868
using TGainDemodulator = TAtrac3GainProcessor::TGainDemodulator;
69+
struct TGainEnergyAnalysis {
70+
NAtrac3::TGainEnergyScale Scale;
71+
float NextOverlapScale = 1.0f;
72+
};
6973
typedef std::array<TGainDemodulator, 4> TGainDemodulatorArray;
7074
typedef std::array<TGainModulator, 4> TGainModulatorArray;
75+
static TGainEnergyAnalysis CalcGainEnergyScale(
76+
const float prevOverlap[256],
77+
const float curInput[256],
78+
const std::vector<TAtrac3Data::SubbandInfo::TGainPoint>& gainPoints,
79+
float prevOverlapScale);
7180
void Mdct(float specs[1024],
7281
float* bands[4],
7382
float maxLevels[4],
@@ -95,6 +104,7 @@ class TAtrac3Encoder : public IProcessor, public TAtrac3MDCT {
95104

96105
TScaler<TAtrac3Data> Scaler;
97106
std::vector<NAtrac3::TAtrac3BitStreamWriter::TSingleChannelElement> SingleChannelElements;
107+
std::array<std::array<float, NAtrac3::TAtrac3Data::NumQMF>, 2> PrevOverlapGainScale;
98108
private:
99109
bool LookAheadPending = true;
100110
// [channel][band][prev_128 | current_256 | lookahead_256]
@@ -111,8 +121,7 @@ class TAtrac3Encoder : public IProcessor, public TAtrac3MDCT {
111121
#endif
112122
float LimitRel(float x);
113123
void CreateSubbandInfo(const float* upInput[4], uint32_t channel,
114-
TAtrac3Data::SubbandInfo* subbandInfo,
115-
int gainBoostPerBand[TAtrac3Data::NumQMF]);
124+
TAtrac3Data::SubbandInfo* subbandInfo);
116125
TAtrac3Data::TTonalComponents ExtractTonalComponents(float* specs,
117126
const std::vector<float>& flatnessPerBfu);
118127
void Matrixing();

0 commit comments

Comments
 (0)