@@ -97,6 +97,8 @@ TAtrac3Encoder::TAtrac3Encoder(TCompressedOutputPtr&& oma, TAtrac3EncoderSetting
9797 , SingleChannelElements(Params.SourceChannels)
9898 , Upsampler(11025 .0f , 800 .0f )
9999{
100+ for (auto & ch : PrevOverlapGainScale)
101+ ch.fill (1 .0f );
100102 YamlLog = Params.YamlLog ;
101103}
102104
@@ -138,6 +140,89 @@ float TAtrac3Encoder::LimitRel(float x)
138140 return std::min (std::max (x, TAtrac3Data::GainLevel[15 ]), TAtrac3Data::GainLevel[0 ]);
139141}
140142
143+ static float SafeEnergyScale (float originalEnergy, float modulatedEnergy)
144+ {
145+ static constexpr float kEnergyEps = 1 .0e-20f ;
146+ if (originalEnergy <= kEnergyEps || modulatedEnergy <= kEnergyEps
147+ || !std::isfinite (originalEnergy) || !std::isfinite (modulatedEnergy)) {
148+ return 1 .0f ;
149+ }
150+ const float scale = originalEnergy / modulatedEnergy;
151+ return std::isfinite (scale) && scale > 0 .0f ? scale : 1 .0f ;
152+ }
153+
154+ static void BuildSampleDivisors (const std::vector<TAtrac3Data::SubbandInfo::TGainPoint>& pts, float outDiv[256 ])
155+ {
156+ std::fill (outDiv, outDiv + 256 , 1 .0f );
157+
158+ uint32_t pos = 0 ;
159+ for (size_t i = 0 ; i < pts.size (); ++i) {
160+ const uint32_t lastPos = pts[i].Location << TAtrac3Data::LocScale;
161+ float level = TAtrac3Data::GainLevel[pts[i].Level ];
162+ const int incPos = ((i + 1 ) < pts.size () ? pts[i + 1 ].Level : TAtrac3Data::ExponentOffset)
163+ - pts[i].Level + TAtrac3Data::GainInterpolationPosShift;
164+ const float gainInc = TAtrac3Data::GainInterpolation[incPos];
165+
166+ for (; pos < lastPos && pos < 256 ; ++pos)
167+ outDiv[pos] = level;
168+ for (; pos < lastPos + TAtrac3Data::LocSz && pos < 256 ; ++pos) {
169+ outDiv[pos] = level;
170+ level *= gainInc;
171+ }
172+ }
173+ }
174+
175+ TAtrac3MDCT::TGainEnergyAnalysis TAtrac3MDCT::CalcGainEnergyScale (
176+ const float prevOverlap[256 ],
177+ const float curInput[256 ],
178+ const std::vector<TAtrac3Data::SubbandInfo::TGainPoint>& gainPoints,
179+ float prevOverlapScale)
180+ {
181+ TGainEnergyAnalysis res;
182+ if (!std::isfinite (prevOverlapScale) || prevOverlapScale <= 0 .0f )
183+ prevOverlapScale = 1 .0f ;
184+
185+ const float prevDiv = gainPoints.empty ()
186+ ? 1 .0f
187+ : TAtrac3Data::GainLevel[gainPoints.front ().Level ];
188+
189+ float prevStoredEnergy = 0 .0f ;
190+ for (uint32_t i = 0 ; i < 256 ; ++i)
191+ prevStoredEnergy += prevOverlap[i] * prevOverlap[i];
192+
193+ const float prevOriginalEnergy = prevStoredEnergy * prevOverlapScale;
194+ const float prevModulatedEnergy = prevStoredEnergy / (prevDiv * prevDiv);
195+
196+ float sampleDiv[256 ];
197+ BuildSampleDivisors (gainPoints, sampleDiv);
198+
199+ float curOriginalEnergy = 0 .0f ;
200+ float curModulatedEnergy = 0 .0f ;
201+ float nextOriginalEnergy = 0 .0f ;
202+ float nextModulatedEnergy = 0 .0f ;
203+ for (uint32_t i = 0 ; i < 256 ; ++i) {
204+ const float cur = curInput[i];
205+ const float mod = cur / sampleDiv[i];
206+ const float winCur = TAtrac3Data::EncodeWindow[255 - i];
207+ const float winNext = TAtrac3Data::EncodeWindow[i];
208+ const float curWin = cur * winCur;
209+ const float modCurWin = mod * winCur;
210+ const float nextWin = cur * winNext;
211+ const float modNextWin = mod * winNext;
212+ curOriginalEnergy += curWin * curWin;
213+ curModulatedEnergy += modCurWin * modCurWin;
214+ nextOriginalEnergy += nextWin * nextWin;
215+ nextModulatedEnergy += modNextWin * modNextWin;
216+ }
217+
218+ res.Scale .PrevHalf = SafeEnergyScale (prevOriginalEnergy, prevModulatedEnergy);
219+ res.Scale .CurHalf = SafeEnergyScale (curOriginalEnergy, curModulatedEnergy);
220+ res.Scale .Frame = SafeEnergyScale (prevOriginalEnergy + curOriginalEnergy,
221+ prevModulatedEnergy + curModulatedEnergy);
222+ res.NextOverlapScale = SafeEnergyScale (nextOriginalEnergy, nextModulatedEnergy);
223+ return res;
224+ }
225+
141226// Build 32 subframe-average divisors (gain levels) that Modulate would apply
142227// to bufNext for a given curve.
143228static void BuildSubframeDivisors (const std::vector<TGainCurvePoint>& pts, float outDiv[32 ]) {
@@ -213,12 +298,8 @@ static float CalcCurveEarlyMismatchScore(const std::vector<float>& gain,
213298
214299void TAtrac3Encoder::CreateSubbandInfo (const float * upInput[4 ],
215300 uint32_t channel,
216- TAtrac3Data::SubbandInfo* subbandInfo,
217- int gainBoostPerBand[TAtrac3Data::NumQMF])
301+ TAtrac3Data::SubbandInfo* subbandInfo)
218302{
219- static constexpr float kLowOverlapRelax = 0 .6f ; // allow softer min level when overlap is small
220- static constexpr int kLevelBoostCap = 1 ; // cap level boost to reduce bit starvation
221- static constexpr int kScaleBoostCap = 2 ; // allow extra scale boost in low-risk cases
222303 static constexpr float kMinScore = 1 .9f ;
223304
224305 // YAML: channel header (one channel per CreateSubbandInfo call)
@@ -315,7 +396,6 @@ void TAtrac3Encoder::CreateSubbandInfo(const float* upInput[4],
315396 if (YamlLog) {
316397 *YamlLog << " skip: no_curve\n " ;
317398 }
318- gainBoostPerBand[band] = 0 ;
319399 continue ;
320400 }
321401
@@ -329,8 +409,6 @@ void TAtrac3Encoder::CreateSubbandInfo(const float* upInput[4],
329409
330410 float maxGain = 0 .0f ;
331411 for (float g : gain) maxGain = std::max (maxGain, g);
332- const float frameEndLevel = gain.back ();
333- const float ratio = maxGain / (frameEndLevel + 1e-9f );
334412
335413 // Minimum signal gate: suppress curves on near-silent frames.
336414 // Firing on noise-floor content wastes bitrate and can produce extreme
@@ -342,7 +420,6 @@ void TAtrac3Encoder::CreateSubbandInfo(const float* upInput[4],
342420 if (YamlLog)
343421 *YamlLog << std::fixed << std::setprecision (6 )
344422 << " skip: below_min_signal # maxGain " << maxGain << " \n " ;
345- gainBoostPerBand[band] = 0 ;
346423 curvePoints.clear ();
347424 }
348425
@@ -354,51 +431,12 @@ void TAtrac3Encoder::CreateSubbandInfo(const float* upInput[4],
354431 if (result.highFreqRatio < kMinHfrForAmplify ) {
355432 if (YamlLog)
356433 *YamlLog << " skip: amplify_low_hfr\n " ;
357- gainBoostPerBand[band] = 0 ;
358434 curvePoints.clear ();
359435 }
360436
361- int levelBoost = 0 ;
362-
363- // Scale boost: compensate for Demodulate's `scale = GainLevel[giNext[0].Level]`.
364- // When decoding frame N, scale = GainLevel[frame N+1's first gain point Level].
365- // Frame N+1's CalcCurve: scaleLevel = RelationToIdx(gain.back()_N / nextLevel_{N+2}).
366- // We have the full frame N+1 in the lookahead [3072..5119]. Use min(lookaheadGain)
367- // as a conservative proxy for nextLevel_{N+2} (≈ quietest level reachable in N+1,
368- // a lower bound on frame N+2's start level).
369- int scaleBoost = 0 ;
370- {
371- static constexpr size_t kLookaheadOffset = 3072 ;
372- const size_t outSz = result.signal .size ();
373- if (outSz > kLookaheadOffset + 64 ) {
374- const uint32_t lookaheadPoints =
375- static_cast <uint32_t >(std::min<size_t >(1024 , outSz - kLookaheadOffset ) / 64 );
376- if (lookaheadPoints > 0 ) {
377- const auto lookaheadGain = AnalyzeGain (result.signal .data () + kLookaheadOffset ,
378- lookaheadPoints * 64 ,
379- lookaheadPoints, true );
380- const float lookaheadMin = *std::min_element (lookaheadGain.begin (), lookaheadGain.end ());
381- if (lookaheadMin > 1e-6f ) {
382- const uint32_t estimatedNextScaleLevel = RelationToIdx (frameEndLevel / lookaheadMin);
383- if (estimatedNextScaleLevel < 4u )
384- scaleBoost = static_cast <int >(4u - estimatedNextScaleLevel);
385- }
386- }
387- }
388- }
389-
390- const int scaleCap = (overlapRatio < kLowOverlapRelax ) ? kScaleBoostCap : kLevelBoostCap ;
391- scaleBoost = std::min (scaleBoost, scaleCap);
392- const int totalBoost = std::min (levelBoost + scaleBoost, kLevelBoostCap );
393-
394437 if (YamlLog) {
395438 *YamlLog << std::fixed << std::setprecision (4 )
396- << " max_gain: " << maxGain << " \n "
397- << " ratio: " << ratio
398- << " # max_gain/frame_end_level, transient strength\n "
399- << " level_boost: " << levelBoost << " \n "
400- << " scale_boost: " << scaleBoost << " \n "
401- << " total_boost: " << totalBoost << " \n " ;
439+ << " max_gain: " << maxGain << " \n " ;
402440 }
403441
404442 // Band 3 is above ~16 kHz where pre-echo is largely inaudible.
@@ -408,17 +446,9 @@ void TAtrac3Encoder::CreateSubbandInfo(const float* upInput[4],
408446 *YamlLog << " skip: band_ge_3"
409447 << " # inaudible HF; gain modulation disabled\n " ;
410448 }
411- gainBoostPerBand[band] = 0 ;
412449 curvePoints.clear ();
413450 }
414451
415- if (band < 3 ) {
416- if (YamlLog)
417- *YamlLog << " gain_boost: " << totalBoost << " \n " ;
418- gainBoostPerBand[band] = totalBoost;
419- }
420-
421-
422452 // Explicit point 0: correct cross-frame energy step in the HPF domain.
423453 // Compare prevTarget (what the previous frame's curve was targeting, in the
424454 // HPF gain[] domain) against the mean HPF level of the pre-ramp zone of
@@ -727,6 +757,8 @@ TPCMEngine::TProcessLambda TAtrac3Encoder::GetLambda()
727757 sce->TonalBlocks .clear ();
728758
729759 sce->SubbandInfo .Reset ();
760+ for (auto & scale : sce->GainEnergyScale )
761+ scale = TGainEnergyScale{};
730762 if (!Params.NoGainControll ) {
731763 // upInput[b]:
732764 // [0..127] prev tail (last 128 of previous frame)
@@ -741,9 +773,30 @@ TPCMEngine::TProcessLambda TAtrac3Encoder::GetLambda()
741773 jsStereo ? jsGainInput[channel][2 ] : LookAheadBuf[channel][2 ],
742774 jsStereo ? jsGainInput[channel][3 ] : LookAheadBuf[channel][3 ]
743775 };
744- std::fill (sce->GainBoostPerBand ,
745- sce->GainBoostPerBand + TAtrac3Data::NumQMF, 0 );
746- CreateSubbandInfo (up, channel, &sce->SubbandInfo , sce->GainBoostPerBand );
776+ CreateSubbandInfo (up, channel, &sce->SubbandInfo );
777+ }
778+
779+ for (uint32_t band = 0 ; band < TAtrac3Data::NumQMF; ++band) {
780+ const uint32_t qmfIdx = channel + band * 2 ;
781+ const auto gainEnergy = CalcGainEnergyScale (PcmBuffer.GetFirst (qmfIdx),
782+ PcmBuffer.GetSecond (qmfIdx),
783+ sce->SubbandInfo .GetGainPoints (band),
784+ PrevOverlapGainScale[channel][band]);
785+ sce->GainEnergyScale [band] = gainEnergy.Scale ;
786+ PrevOverlapGainScale[channel][band] = gainEnergy.NextOverlapScale ;
787+ }
788+ if (YamlLog && !Params.NoGainControll ) {
789+ *YamlLog << std::fixed << std::setprecision (6 )
790+ << " gain_energy_scale:\n " ;
791+ for (uint32_t band = 0 ; band < TAtrac3Data::NumQMF; ++band) {
792+ const auto & scale = sce->GainEnergyScale [band];
793+ *YamlLog << " - {band: " << band
794+ << " , prev_half: " << scale.PrevHalf
795+ << " , cur_half: " << scale.CurHalf
796+ << " , frame: " << scale.Frame
797+ << " , next_overlap: " << PrevOverlapGainScale[channel][band]
798+ << " }\n " ;
799+ }
747800 }
748801
749802 float * maxOverlapLevels = PrevPeak[channel];
@@ -760,7 +813,8 @@ TPCMEngine::TProcessLambda TAtrac3Encoder::GetLambda()
760813 for (size_t i = 0 ; i < specs.size (); i++) {
761814 float e = specs[i] * specs[i];
762815 mdctEnergy[i] = e;
763- l += e * LoudnessCurve[i];
816+ const uint32_t band = static_cast <uint32_t >(i / 256 );
817+ l += e * sce->GainEnergyScale [band].Frame * LoudnessCurve[i];
764818 }
765819
766820 sce->Loudness = l;
0 commit comments