aaccoder_twoloop.h 35 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763
  1. /*
  2. * AAC encoder twoloop coder
  3. * Copyright (C) 2008-2009 Konstantin Shishkov
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * AAC encoder twoloop coder
  24. * @author Konstantin Shishkov, Claudio Freire
  25. */
  26. /**
  27. * This file contains a template for the twoloop coder function.
  28. * It needs to be provided, externally, as an already included declaration,
  29. * the following functions from aacenc_quantization/util.h. They're not included
  30. * explicitly here to make it possible to provide alternative implementations:
  31. * - quantize_band_cost
  32. * - abs_pow34_v
  33. * - find_max_val
  34. * - find_min_book
  35. * - find_form_factor
  36. */
  37. #ifndef AVCODEC_AACCODER_TWOLOOP_H
  38. #define AVCODEC_AACCODER_TWOLOOP_H
  39. #include <float.h>
  40. #include "libavutil/mathematics.h"
  41. #include "mathops.h"
  42. #include "avcodec.h"
  43. #include "put_bits.h"
  44. #include "aac.h"
  45. #include "aacenc.h"
  46. #include "aactab.h"
  47. #include "aacenctab.h"
  48. /** Frequency in Hz for lower limit of noise substitution **/
  49. #define NOISE_LOW_LIMIT 4000
  50. #define sclip(x) av_clip(x,60,218)
  51. /* Reflects the cost to change codebooks */
  52. static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g)
  53. {
  54. return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5;
  55. }
  56. /**
  57. * two-loop quantizers search taken from ISO 13818-7 Appendix C
  58. */
  59. static void search_for_quantizers_twoloop(AVCodecContext *avctx,
  60. AACEncContext *s,
  61. SingleChannelElement *sce,
  62. const float lambda)
  63. {
  64. int start = 0, i, w, w2, g, recomprd;
  65. int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
  66. / ((avctx->flags & AV_CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
  67. * (lambda / 120.f);
  68. int refbits = destbits;
  69. int toomanybits, toofewbits;
  70. char nzs[128];
  71. uint8_t nextband[128];
  72. int maxsf[128], minsf[128];
  73. float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];
  74. float maxvals[128], spread_thr_r[128];
  75. float min_spread_thr_r, max_spread_thr_r;
  76. /**
  77. * rdlambda controls the maximum tolerated distortion. Twoloop
  78. * will keep iterating until it fails to lower it or it reaches
  79. * ulimit * rdlambda. Keeping it low increases quality on difficult
  80. * signals, but lower it too much, and bits will be taken from weak
  81. * signals, creating "holes". A balance is necessary.
  82. * rdmax and rdmin specify the relative deviation from rdlambda
  83. * allowed for tonality compensation
  84. */
  85. float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f);
  86. const float nzslope = 1.5f;
  87. float rdmin = 0.03125f;
  88. float rdmax = 1.0f;
  89. /**
  90. * sfoffs controls an offset of optmium allocation that will be
  91. * applied based on lambda. Keep it real and modest, the loop
  92. * will take care of the rest, this just accelerates convergence
  93. */
  94. float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10);
  95. int fflag, minscaler, maxscaler, nminscaler;
  96. int its = 0;
  97. int maxits = 30;
  98. int allz = 0;
  99. int tbits;
  100. int cutoff = 1024;
  101. int pns_start_pos;
  102. int prev;
  103. /**
  104. * zeroscale controls a multiplier of the threshold, if band energy
  105. * is below this, a zero is forced. Keep it lower than 1, unless
  106. * low lambda is used, because energy < threshold doesn't mean there's
  107. * no audible signal outright, it's just energy. Also make it rise
  108. * slower than rdlambda, as rdscale has due compensation with
  109. * noisy band depriorization below, whereas zeroing logic is rather dumb
  110. */
  111. float zeroscale;
  112. if (lambda > 120.f) {
  113. zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f);
  114. } else {
  115. zeroscale = 1.f;
  116. }
  117. if (s->psy.bitres.alloc >= 0) {
  118. /**
  119. * Psy granted us extra bits to use, from the reservoire
  120. * adjust for lambda except what psy already did
  121. */
  122. destbits = s->psy.bitres.alloc
  123. * (lambda / (avctx->global_quality ? avctx->global_quality : 120));
  124. }
  125. if (avctx->flags & AV_CODEC_FLAG_QSCALE) {
  126. /**
  127. * Constant Q-scale doesn't compensate MS coding on its own
  128. * No need to be overly precise, this only controls RD
  129. * adjustment CB limits when going overboard
  130. */
  131. if (s->options.mid_side && s->cur_type == TYPE_CPE)
  132. destbits *= 2;
  133. /**
  134. * When using a constant Q-scale, don't adjust bits, just use RD
  135. * Don't let it go overboard, though... 8x psy target is enough
  136. */
  137. toomanybits = 5800;
  138. toofewbits = destbits / 16;
  139. /** Don't offset scalers, just RD */
  140. sfoffs = sce->ics.num_windows - 1;
  141. rdlambda = sqrtf(rdlambda);
  142. /** search further */
  143. maxits *= 2;
  144. } else {
  145. /* When using ABR, be strict, but a reasonable leeway is
  146. * critical to allow RC to smoothly track desired bitrate
  147. * without sudden quality drops that cause audible artifacts.
  148. * Symmetry is also desirable, to avoid systematic bias.
  149. */
  150. toomanybits = destbits + destbits/8;
  151. toofewbits = destbits - destbits/8;
  152. sfoffs = 0;
  153. rdlambda = sqrtf(rdlambda);
  154. }
  155. /** and zero out above cutoff frequency */
  156. {
  157. int wlen = 1024 / sce->ics.num_windows;
  158. int bandwidth;
  159. /**
  160. * Scale, psy gives us constant quality, this LP only scales
  161. * bitrate by lambda, so we save bits on subjectively unimportant HF
  162. * rather than increase quantization noise. Adjust nominal bitrate
  163. * to effective bitrate according to encoding parameters,
  164. * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.
  165. */
  166. float rate_bandwidth_multiplier = 1.5f;
  167. int frame_bit_rate = (avctx->flags & AV_CODEC_FLAG_QSCALE)
  168. ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
  169. : (avctx->bit_rate / avctx->channels);
  170. /** Compensate for extensions that increase efficiency */
  171. if (s->options.pns || s->options.intensity_stereo)
  172. frame_bit_rate *= 1.15f;
  173. if (avctx->cutoff > 0) {
  174. bandwidth = avctx->cutoff;
  175. } else {
  176. bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
  177. s->psy.cutoff = bandwidth;
  178. }
  179. cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
  180. pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate;
  181. }
  182. /**
  183. * for values above this the decoder might end up in an endless loop
  184. * due to always having more bits than what can be encoded.
  185. */
  186. destbits = FFMIN(destbits, 5800);
  187. toomanybits = FFMIN(toomanybits, 5800);
  188. toofewbits = FFMIN(toofewbits, 5800);
  189. /**
  190. * XXX: some heuristic to determine initial quantizers will reduce search time
  191. * determine zero bands and upper distortion limits
  192. */
  193. min_spread_thr_r = -1;
  194. max_spread_thr_r = -1;
  195. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  196. for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
  197. int nz = 0;
  198. float uplim = 0.0f, energy = 0.0f, spread = 0.0f;
  199. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  200. FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  201. if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) {
  202. sce->zeroes[(w+w2)*16+g] = 1;
  203. continue;
  204. }
  205. nz = 1;
  206. }
  207. if (!nz) {
  208. uplim = 0.0f;
  209. } else {
  210. nz = 0;
  211. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  212. FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
  213. if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f)
  214. continue;
  215. uplim += band->threshold;
  216. energy += band->energy;
  217. spread += band->spread;
  218. nz++;
  219. }
  220. }
  221. uplims[w*16+g] = uplim;
  222. energies[w*16+g] = energy;
  223. nzs[w*16+g] = nz;
  224. sce->zeroes[w*16+g] = !nz;
  225. allz |= nz;
  226. if (nz && sce->can_pns[w*16+g]) {
  227. spread_thr_r[w*16+g] = energy * nz / (uplim * spread);
  228. if (min_spread_thr_r < 0) {
  229. min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g];
  230. } else {
  231. min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]);
  232. max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]);
  233. }
  234. }
  235. }
  236. }
  237. /** Compute initial scalers */
  238. minscaler = 65535;
  239. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  240. for (g = 0; g < sce->ics.num_swb; g++) {
  241. if (sce->zeroes[w*16+g]) {
  242. sce->sf_idx[w*16+g] = SCALE_ONE_POS;
  243. continue;
  244. }
  245. /**
  246. * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).
  247. * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,
  248. * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus
  249. * more robust.
  250. */
  251. sce->sf_idx[w*16+g] = av_clip(
  252. SCALE_ONE_POS
  253. + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g])
  254. + sfoffs,
  255. 60, SCALE_MAX_POS);
  256. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  257. }
  258. }
  259. /** Clip */
  260. minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
  261. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
  262. for (g = 0; g < sce->ics.num_swb; g++)
  263. if (!sce->zeroes[w*16+g])
  264. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1);
  265. if (!allz)
  266. return;
  267. s->abs_pow34(s->scoefs, sce->coeffs, 1024);
  268. ff_quantize_band_cost_cache_init(s);
  269. for (i = 0; i < sizeof(minsf) / sizeof(minsf[0]); ++i)
  270. minsf[i] = 0;
  271. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  272. start = w*128;
  273. for (g = 0; g < sce->ics.num_swb; g++) {
  274. const float *scaled = s->scoefs + start;
  275. int minsfidx;
  276. maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
  277. if (maxvals[w*16+g] > 0) {
  278. minsfidx = coef2minsf(maxvals[w*16+g]);
  279. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
  280. minsf[(w+w2)*16+g] = minsfidx;
  281. }
  282. start += sce->ics.swb_sizes[g];
  283. }
  284. }
  285. /**
  286. * Scale uplims to match rate distortion to quality
  287. * bu applying noisy band depriorization and tonal band priorization.
  288. * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.
  289. * If maxval^2 ~ energy, then that band is mostly noise, and we can relax
  290. * rate distortion requirements.
  291. */
  292. memcpy(euplims, uplims, sizeof(euplims));
  293. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  294. /** psy already priorizes transients to some extent */
  295. float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f;
  296. start = w*128;
  297. for (g = 0; g < sce->ics.num_swb; g++) {
  298. if (nzs[g] > 0) {
  299. float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f));
  300. float energy2uplim = find_form_factor(
  301. sce->ics.group_len[w], sce->ics.swb_sizes[g],
  302. uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
  303. sce->coeffs + start,
  304. nzslope * cleanup_factor);
  305. energy2uplim *= de_psy_factor;
  306. if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) {
  307. /** In ABR, we need to priorize less and let rate control do its thing */
  308. energy2uplim = sqrtf(energy2uplim);
  309. }
  310. energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
  311. uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax)
  312. * sce->ics.group_len[w];
  313. energy2uplim = find_form_factor(
  314. sce->ics.group_len[w], sce->ics.swb_sizes[g],
  315. uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
  316. sce->coeffs + start,
  317. 2.0f);
  318. energy2uplim *= de_psy_factor;
  319. if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) {
  320. /** In ABR, we need to priorize less and let rate control do its thing */
  321. energy2uplim = sqrtf(energy2uplim);
  322. }
  323. energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
  324. euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w],
  325. 0.5f, 1.0f);
  326. }
  327. start += sce->ics.swb_sizes[g];
  328. }
  329. }
  330. for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i)
  331. maxsf[i] = SCALE_MAX_POS;
  332. //perform two-loop search
  333. //outer loop - improve quality
  334. do {
  335. //inner loop - quantize spectrum to fit into given number of bits
  336. int overdist;
  337. int qstep = its ? 1 : 32;
  338. do {
  339. int changed = 0;
  340. prev = -1;
  341. recomprd = 0;
  342. tbits = 0;
  343. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  344. start = w*128;
  345. for (g = 0; g < sce->ics.num_swb; g++) {
  346. const float *coefs = &sce->coeffs[start];
  347. const float *scaled = &s->scoefs[start];
  348. int bits = 0;
  349. int cb;
  350. float dist = 0.0f;
  351. float qenergy = 0.0f;
  352. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  353. start += sce->ics.swb_sizes[g];
  354. if (sce->can_pns[w*16+g]) {
  355. /** PNS isn't free */
  356. tbits += ff_pns_bits(sce, w, g);
  357. }
  358. continue;
  359. }
  360. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  361. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  362. int b;
  363. float sqenergy;
  364. dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
  365. scaled + w2*128,
  366. sce->ics.swb_sizes[g],
  367. sce->sf_idx[w*16+g],
  368. cb,
  369. 1.0f,
  370. INFINITY,
  371. &b, &sqenergy,
  372. 0);
  373. bits += b;
  374. qenergy += sqenergy;
  375. }
  376. dists[w*16+g] = dist - bits;
  377. qenergies[w*16+g] = qenergy;
  378. if (prev != -1) {
  379. int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
  380. bits += ff_aac_scalefactor_bits[sfdiff];
  381. }
  382. tbits += bits;
  383. start += sce->ics.swb_sizes[g];
  384. prev = sce->sf_idx[w*16+g];
  385. }
  386. }
  387. if (tbits > toomanybits) {
  388. recomprd = 1;
  389. for (i = 0; i < 128; i++) {
  390. if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) {
  391. int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i];
  392. int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep);
  393. if (new_sf != sce->sf_idx[i]) {
  394. sce->sf_idx[i] = new_sf;
  395. changed = 1;
  396. }
  397. }
  398. }
  399. } else if (tbits < toofewbits) {
  400. recomprd = 1;
  401. for (i = 0; i < 128; i++) {
  402. if (sce->sf_idx[i] > SCALE_ONE_POS) {
  403. int new_sf = FFMAX3(minsf[i], SCALE_ONE_POS, sce->sf_idx[i] - qstep);
  404. if (new_sf != sce->sf_idx[i]) {
  405. sce->sf_idx[i] = new_sf;
  406. changed = 1;
  407. }
  408. }
  409. }
  410. }
  411. qstep >>= 1;
  412. if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed)
  413. qstep = 1;
  414. } while (qstep);
  415. overdist = 1;
  416. fflag = tbits < toofewbits;
  417. for (i = 0; i < 2 && (overdist || recomprd); ++i) {
  418. if (recomprd) {
  419. /** Must recompute distortion */
  420. prev = -1;
  421. tbits = 0;
  422. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  423. start = w*128;
  424. for (g = 0; g < sce->ics.num_swb; g++) {
  425. const float *coefs = sce->coeffs + start;
  426. const float *scaled = s->scoefs + start;
  427. int bits = 0;
  428. int cb;
  429. float dist = 0.0f;
  430. float qenergy = 0.0f;
  431. if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
  432. start += sce->ics.swb_sizes[g];
  433. if (sce->can_pns[w*16+g]) {
  434. /** PNS isn't free */
  435. tbits += ff_pns_bits(sce, w, g);
  436. }
  437. continue;
  438. }
  439. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  440. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  441. int b;
  442. float sqenergy;
  443. dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
  444. scaled + w2*128,
  445. sce->ics.swb_sizes[g],
  446. sce->sf_idx[w*16+g],
  447. cb,
  448. 1.0f,
  449. INFINITY,
  450. &b, &sqenergy,
  451. 0);
  452. bits += b;
  453. qenergy += sqenergy;
  454. }
  455. dists[w*16+g] = dist - bits;
  456. qenergies[w*16+g] = qenergy;
  457. if (prev != -1) {
  458. int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
  459. bits += ff_aac_scalefactor_bits[sfdiff];
  460. }
  461. tbits += bits;
  462. start += sce->ics.swb_sizes[g];
  463. prev = sce->sf_idx[w*16+g];
  464. }
  465. }
  466. }
  467. if (!i && s->options.pns && its > maxits/2 && tbits > toofewbits) {
  468. float maxoverdist = 0.0f;
  469. float ovrfactor = 1.f+(maxits-its)*16.f/maxits;
  470. overdist = recomprd = 0;
  471. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  472. for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
  473. if (!sce->zeroes[w*16+g] && sce->sf_idx[w*16+g] > SCALE_ONE_POS && dists[w*16+g] > uplims[w*16+g]*ovrfactor) {
  474. float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]);
  475. maxoverdist = FFMAX(maxoverdist, ovrdist);
  476. overdist++;
  477. }
  478. }
  479. }
  480. if (overdist) {
  481. /* We have overdistorted bands, trade for zeroes (that can be noise)
  482. * Zero the bands in the lowest 1.25% spread-energy-threshold ranking
  483. */
  484. float minspread = max_spread_thr_r;
  485. float maxspread = min_spread_thr_r;
  486. float zspread;
  487. int zeroable = 0;
  488. int zeroed = 0;
  489. int maxzeroed, zloop;
  490. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  491. for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
  492. if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) {
  493. minspread = FFMIN(minspread, spread_thr_r[w*16+g]);
  494. maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]);
  495. zeroable++;
  496. }
  497. }
  498. }
  499. zspread = (maxspread-minspread) * 0.0125f + minspread;
  500. /* Don't PNS everything even if allowed. It suppresses bit starvation signals from RC,
  501. * and forced the hand of the later search_for_pns step.
  502. * Instead, PNS a fraction of the spread_thr_r range depending on how starved for bits we are,
  503. * and leave further PNSing to search_for_pns if worthwhile.
  504. */
  505. zspread = FFMIN3(min_spread_thr_r * 8.f, zspread,
  506. ((toomanybits - tbits) * min_spread_thr_r + (tbits - toofewbits) * max_spread_thr_r) / (toomanybits - toofewbits + 1));
  507. maxzeroed = FFMIN(zeroable, FFMAX(1, (zeroable * its + maxits - 1) / (2 * maxits)));
  508. for (zloop = 0; zloop < 2; zloop++) {
  509. /* Two passes: first distorted stuff - two birds in one shot and all that,
  510. * then anything viable. Viable means not zero, but either CB=zero-able
  511. * (too high SF), not SF <= 1 (that means we'd be operating at very high
  512. * quality, we don't want PNS when doing VHQ), PNS allowed, and within
  513. * the lowest ranking percentile.
  514. */
  515. float loopovrfactor = (zloop) ? 1.0f : ovrfactor;
  516. int loopminsf = (zloop) ? (SCALE_ONE_POS - SCALE_DIV_512) : SCALE_ONE_POS;
  517. int mcb;
  518. for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) {
  519. if (sce->ics.swb_offset[g] < pns_start_pos)
  520. continue;
  521. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  522. if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread
  523. && sce->sf_idx[w*16+g] > loopminsf
  524. && (dists[w*16+g] > loopovrfactor*uplims[w*16+g] || !(mcb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]))
  525. || (mcb <= 1 && dists[w*16+g] > FFMIN(uplims[w*16+g], euplims[w*16+g]))) ) {
  526. sce->zeroes[w*16+g] = 1;
  527. sce->band_type[w*16+g] = 0;
  528. zeroed++;
  529. }
  530. }
  531. }
  532. }
  533. if (zeroed)
  534. recomprd = fflag = 1;
  535. } else {
  536. overdist = 0;
  537. }
  538. }
  539. }
  540. minscaler = SCALE_MAX_POS;
  541. maxscaler = 0;
  542. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  543. for (g = 0; g < sce->ics.num_swb; g++) {
  544. if (!sce->zeroes[w*16+g]) {
  545. minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
  546. maxscaler = FFMAX(maxscaler, sce->sf_idx[w*16+g]);
  547. }
  548. }
  549. }
  550. minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
  551. prev = -1;
  552. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  553. /** Start with big steps, end up fine-tunning */
  554. int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;
  555. int edepth = depth+2;
  556. float uplmax = its / (maxits*0.25f) + 1.0f;
  557. uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f;
  558. start = w * 128;
  559. for (g = 0; g < sce->ics.num_swb; g++) {
  560. int prevsc = sce->sf_idx[w*16+g];
  561. if (prev < 0 && !sce->zeroes[w*16+g])
  562. prev = sce->sf_idx[0];
  563. if (!sce->zeroes[w*16+g]) {
  564. const float *coefs = sce->coeffs + start;
  565. const float *scaled = s->scoefs + start;
  566. int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  567. int mindeltasf = FFMAX(0, prev - SCALE_MAX_DIFF);
  568. int maxdeltasf = FFMIN(SCALE_MAX_POS - SCALE_DIV_512, prev + SCALE_MAX_DIFF);
  569. if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > FFMAX(mindeltasf, minsf[w*16+g])) {
  570. /* Try to make sure there is some energy in every nonzero band
  571. * NOTE: This algorithm must be forcibly imbalanced, pushing harder
  572. * on holes or more distorted bands at first, otherwise there's
  573. * no net gain (since the next iteration will offset all bands
  574. * on the opposite direction to compensate for extra bits)
  575. */
  576. for (i = 0; i < edepth && sce->sf_idx[w*16+g] > mindeltasf; ++i) {
  577. int cb, bits;
  578. float dist, qenergy;
  579. int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1);
  580. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  581. dist = qenergy = 0.f;
  582. bits = 0;
  583. if (!cb) {
  584. maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]);
  585. } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) {
  586. break;
  587. }
  588. /* !g is the DC band, it's important, since quantization error here
  589. * applies to less than a cycle, it creates horrible intermodulation
  590. * distortion if it doesn't stick to what psy requests
  591. */
  592. if (!g && sce->ics.num_windows > 1 && dists[w*16+g] >= euplims[w*16+g])
  593. maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
  594. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  595. int b;
  596. float sqenergy;
  597. dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
  598. scaled + w2*128,
  599. sce->ics.swb_sizes[g],
  600. sce->sf_idx[w*16+g]-1,
  601. cb,
  602. 1.0f,
  603. INFINITY,
  604. &b, &sqenergy,
  605. 0);
  606. bits += b;
  607. qenergy += sqenergy;
  608. }
  609. sce->sf_idx[w*16+g]--;
  610. dists[w*16+g] = dist - bits;
  611. qenergies[w*16+g] = qenergy;
  612. if (mb && (sce->sf_idx[w*16+g] < mindeltasf || (
  613. (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g]))
  614. && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
  615. ) )) {
  616. break;
  617. }
  618. }
  619. } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < FFMIN(maxdeltasf, maxsf[w*16+g])
  620. && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g]))
  621. && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
  622. ) {
  623. /** Um... over target. Save bits for more important stuff. */
  624. for (i = 0; i < depth && sce->sf_idx[w*16+g] < maxdeltasf; ++i) {
  625. int cb, bits;
  626. float dist, qenergy;
  627. cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1);
  628. if (cb > 0) {
  629. dist = qenergy = 0.f;
  630. bits = 0;
  631. for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
  632. int b;
  633. float sqenergy;
  634. dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
  635. scaled + w2*128,
  636. sce->ics.swb_sizes[g],
  637. sce->sf_idx[w*16+g]+1,
  638. cb,
  639. 1.0f,
  640. INFINITY,
  641. &b, &sqenergy,
  642. 0);
  643. bits += b;
  644. qenergy += sqenergy;
  645. }
  646. dist -= bits;
  647. if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) {
  648. sce->sf_idx[w*16+g]++;
  649. dists[w*16+g] = dist;
  650. qenergies[w*16+g] = qenergy;
  651. } else {
  652. break;
  653. }
  654. } else {
  655. maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
  656. break;
  657. }
  658. }
  659. }
  660. prev = sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], mindeltasf, maxdeltasf);
  661. if (sce->sf_idx[w*16+g] != prevsc)
  662. fflag = 1;
  663. nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]);
  664. sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  665. }
  666. start += sce->ics.swb_sizes[g];
  667. }
  668. }
  669. /** SF difference limit violation risk. Must re-clamp. */
  670. prev = -1;
  671. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  672. for (g = 0; g < sce->ics.num_swb; g++) {
  673. if (!sce->zeroes[w*16+g]) {
  674. int prevsf = sce->sf_idx[w*16+g];
  675. if (prev < 0)
  676. prev = prevsf;
  677. sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], prev - SCALE_MAX_DIFF, prev + SCALE_MAX_DIFF);
  678. sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  679. prev = sce->sf_idx[w*16+g];
  680. if (!fflag && prevsf != sce->sf_idx[w*16+g])
  681. fflag = 1;
  682. }
  683. }
  684. }
  685. its++;
  686. } while (fflag && its < maxits);
  687. /** Scout out next nonzero bands */
  688. ff_init_nextband_map(sce, nextband);
  689. prev = -1;
  690. for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
  691. /** Make sure proper codebooks are set */
  692. for (g = 0; g < sce->ics.num_swb; g++) {
  693. if (!sce->zeroes[w*16+g]) {
  694. sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
  695. if (sce->band_type[w*16+g] <= 0) {
  696. if (!ff_sfdelta_can_remove_band(sce, nextband, prev, w*16+g)) {
  697. /** Cannot zero out, make sure it's not attempted */
  698. sce->band_type[w*16+g] = 1;
  699. } else {
  700. sce->zeroes[w*16+g] = 1;
  701. sce->band_type[w*16+g] = 0;
  702. }
  703. }
  704. } else {
  705. sce->band_type[w*16+g] = 0;
  706. }
  707. /** Check that there's no SF delta range violations */
  708. if (!sce->zeroes[w*16+g]) {
  709. if (prev != -1) {
  710. av_unused int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
  711. av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
  712. } else if (sce->zeroes[0]) {
  713. /** Set global gain to something useful */
  714. sce->sf_idx[0] = sce->sf_idx[w*16+g];
  715. }
  716. prev = sce->sf_idx[w*16+g];
  717. }
  718. }
  719. }
  720. }
  721. #endif /* AVCODEC_AACCODER_TWOLOOP_H */